@@ -90,16 +90,6 @@ pub const Page = struct {
9090
9191 load_state : LoadState = .parsing ,
9292
93- // Page.wait balances waiting for resources / tasks and producing an output.
94- // Up until a timeout, Page.wait will always wait for inflight or pending
95- // HTTP requests, via the Http.Client.active counter. However, intercepted
96- // requests (via CDP, but it could be anything), aren't considered "active"
97- // connection. So it's possible that we have intercepted requests (which are
98- // pending on some driver to continue/abort) while Http.Client.active == 0.
99- // This boolean exists to supplment Http.Client.active and inform Page.wait
100- // of pending connections.
101- request_intercepted : bool = false ,
102-
10393 const Mode = union (enum ) {
10494 pre : void ,
10595 err : anyerror ,
@@ -262,23 +252,26 @@ pub const Page = struct {
262252 return self .script_manager .blockingGet (src );
263253 }
264254
265- pub fn wait (self : * Page , wait_sec : u16 ) void {
266- self ._wait (wait_sec ) catch | err | switch (err ) {
267- error .JsError = > {}, // already logged (with hopefully more context)
268- else = > {
269- // There may be errors from the http/client or ScriptManager
270- // that we should not treat as an error like this. Will need
271- // to run this through more real-world sites and see if we need
272- // to expand the switch (err) to have more customized logs for
273- // specific messages.
274- log .err (.browser , "page wait" , .{ .err = err });
275- },
255+ pub fn wait (self : * Page , wait_ms : i32 ) Session.WaitResult {
256+ return self ._wait (wait_ms ) catch | err | {
257+ switch (err ) {
258+ error .JsError = > {}, // already logged (with hopefully more context)
259+ else = > {
260+ // There may be errors from the http/client or ScriptManager
261+ // that we should not treat as an error like this. Will need
262+ // to run this through more real-world sites and see if we need
263+ // to expand the switch (err) to have more customized logs for
264+ // specific messages.
265+ log .err (.browser , "page wait" , .{ .err = err });
266+ },
267+ }
268+ return .done ;
276269 };
277270 }
278271
279- fn _wait (self : * Page , wait_sec : u16 ) ! void {
272+ fn _wait (self : * Page , wait_ms : i32 ) ! Session.WaitResult {
280273 var timer = try std .time .Timer .start ();
281- var ms_remaining : i32 = @intCast ( wait_sec * 1000 ) ;
274+ var ms_remaining = wait_ms ;
282275
283276 var try_catch : Env.TryCatch = undefined ;
284277 try_catch .init (self .main_context );
@@ -287,40 +280,42 @@ pub const Page = struct {
287280 var scheduler = & self .scheduler ;
288281 var http_client = self .http_client ;
289282
283+ // I'd like the page to know NOTHING about extra_socket / CDP, but the
284+ // fact is that the behavior of wait changes depending on whether or
285+ // not we're using CDP.
286+ // If we aren't using CDP, as soon as we think there's nothing left
287+ // to do, we can exit - we'de done.
288+ // But if we are using CDP, we should wait for the whole `wait_ms`
289+ // because the http_click.tick() also monitors the CDP socket. And while
290+ // we could let CDP poll http (like it does for HTTP requests), the fact
291+ // is that we know more about the timing of stuff (e.g. how long to
292+ // poll/sleep) in the page.
293+ const exit_when_done = http_client .extra_socket == null ;
294+
290295 // for debugging
291296 // defer self.printWaitAnalysis();
292297
293298 while (true ) {
294- SW : switch (self .mode ) {
299+ switch (self .mode ) {
295300 .pre , .raw , .text = > {
296- if (self .request_intercepted ) {
297- // the page request was intercepted.
298-
299- // there shouldn't be any active requests;
300- std .debug .assert (http_client .active == 0 );
301-
302- // nothing we can do for this, need to kick the can up
303- // the chain and wait for activity (e.g. a CDP message)
304- // to unblock this.
305- return ;
306- }
307-
308301 // The main page hasn't started/finished navigating.
309302 // There's no JS to run, and no reason to run the scheduler.
310- if (http_client .active == 0 ) {
303+ if (http_client .active == 0 and exit_when_done ) {
311304 // haven't started navigating, I guess.
312- return ;
305+ return .done ;
313306 }
314307
315- // There should only be 1 active http transfer, the main page
308+ // Either we have active http connections, or we're in CDP
309+ // mode with an extra socket. Either way, we're waiting
310+ // for http traffic
316311 if (try http_client .tick (ms_remaining ) == .extra_socket ) {
317312 // data on a socket we aren't handling, return to caller
318- return ;
313+ return .extra_socket ;
319314 }
320315 },
321316 .html , .parsed = > {
322317 // The HTML page was parsed. We now either have JS scripts to
323- // download, or timeouts to execute, or both.
318+ // download, or scheduled tasks to execute, or both.
324319
325320 // scheduler.run could trigger new http transfers, so do not
326321 // store http_client.active BEFORE this call and then use
@@ -333,72 +328,56 @@ pub const Page = struct {
333328 return error .JsError ;
334329 }
335330
336- if (http_client .active == 0 ) {
337- if (ms_to_next_task ) | ms | {
338- // There are no HTTP transfers, so there's no point calling
339- // http_client.tick.
340- // TODO: should we just force-run the scheduler??
341-
342- if (ms > ms_remaining ) {
343- // we'd wait to long, might as well exit early.
344- return ;
345- }
346- _ = try scheduler .runLowPriority ();
347-
348- // We must use a u64 here b/c ms is a u32 and the
349- // conversion to ns can generate an integer
350- // overflow.
351- const _ms : u64 = @intCast (ms );
352-
353- std .Thread .sleep (std .time .ns_per_ms * _ms );
354- break :SW ;
331+ if (http_client .active == 0 and exit_when_done ) {
332+ const ms = ms_to_next_task orelse {
333+ // no http transfers, no cdp extra socket, no
334+ // scheduled tasks, we're done.
335+ return .done ;
336+ };
337+
338+ if (ms > ms_remaining ) {
339+ // same as above, except we have a scheduled task,
340+ // it just happens to be too far into the future.s
341+ return .done ;
355342 }
356343
357- // We have no active http transfer and no pending
358- // schedule tasks. We're done
359- return ;
360- }
361-
362- _ = try scheduler .runLowPriority ();
363-
364- const request_intercepted = self .request_intercepted ;
365-
366- // We want to prioritize processing intercepted requests
367- // because, the sooner they get unblocked, the sooner we
368- // can start the HTTP request. But we still want to advanced
369- // existing HTTP requests, if possible. So, if we have
370- // intercepted requests, we'll still look at existing HTTP
371- // requests, but we won't block waiting for more data.
372- const ms_to_wait =
373- if (request_intercepted ) 0
374-
375- // But if we have no intercepted requests, we'll wait
376- // for as long as we can for data to our existing
377- // inflight requests
378- else @min (ms_remaining , ms_to_next_task orelse 1000 );
379-
380- if (try http_client .tick (ms_to_wait ) == .extra_socket ) {
381- // data on a socket we aren't handling, return to caller
382- return ;
383- }
384-
385- if (request_intercepted ) {
386- // Again, proritizing intercepted requests. Exit this
387- // loop so that our caller can hopefully resolve them
388- // (i.e. continue or abort them);
389- return ;
344+ _ = try scheduler .runLowPriority ();
345+ // we have a task to run in the not-so-distant future.
346+ // You might think we can just sleep until that task is
347+ // ready, but we should continue to run lowPriority tasks
348+ // in the meantime, and that could unblock things. So
349+ // we'll just sleep for a bit, and then restart our wait
350+ // loop to see what's changed
351+ std .Thread .sleep (std .time .ns_per_ms * @as (u64 , @intCast (@min (ms , 20 ))));
352+ } else {
353+ // We're here because we either have active HTTP
354+ // connections, of exit_when_done == false (aka, there's
355+ // an extra_socket registered with the http client).
356+ _ = try scheduler .runLowPriority ();
357+ const ms_to_wait = @min (ms_remaining , ms_to_next_task orelse 100 );
358+ if (try http_client .tick (ms_to_wait ) == .extra_socket ) {
359+ // data on a socket we aren't handling, return to caller
360+ return .extra_socket ;
361+ }
390362 }
391363 },
392364 .err = > | err | {
393365 self .mode = .{ .raw_done = @errorName (err ) };
394366 return err ;
395367 },
396- .raw_done = > return ,
368+ .raw_done = > {
369+ if (exit_when_done ) {
370+ return .done ;
371+ }
372+ // we _could_ http_client.tick(ms_to_wait), but this has
373+ // the same result, and I feel is more correct.
374+ return .no_page ;
375+ }
397376 }
398377
399378 const ms_elapsed = timer .lap () / 1_000_000 ;
400379 if (ms_elapsed >= ms_remaining ) {
401- return ;
380+ return .done ;
402381 }
403382 ms_remaining -= @intCast (ms_elapsed );
404383 }
@@ -411,48 +390,53 @@ pub const Page = struct {
411390 std .debug .print ("\n active requests: {d}\n " , .{self .http_client .active });
412391 var n_ = self .http_client .handles .in_use .first ;
413392 while (n_ ) | n | {
414- const transfer = Http .Transfer .fromEasy (n .data .conn .easy ) catch | err | {
393+ const handle : * Http.Client.Handle = @fieldParentPtr ("node" , n );
394+ const transfer = Http .Transfer .fromEasy (handle .conn .easy ) catch | err | {
415395 std .debug .print (" - failed to load transfer: {any}\n " , .{err });
416396 break ;
417397 };
418- std .debug .print (" - {s }\n " , .{transfer });
398+ std .debug .print (" - {f }\n " , .{transfer });
419399 n_ = n .next ;
420400 }
421401 }
422402
423403 {
424- std .debug .print ("\n queued requests: {d}\n " , .{self .http_client .queue .len });
404+ std .debug .print ("\n queued requests: {d}\n " , .{self .http_client .queue .len () });
425405 var n_ = self .http_client .queue .first ;
426406 while (n_ ) | n | {
427- std .debug .print (" - {s}\n " , .{n .data .url });
407+ const transfer : * Http.Transfer = @fieldParentPtr ("_node" , n );
408+ std .debug .print (" - {f}\n " , .{transfer .uri });
428409 n_ = n .next ;
429410 }
430411 }
431412
432413 {
433- std .debug .print ("\n scripts: {d}\n " , .{self .script_manager .scripts .len });
414+ std .debug .print ("\n scripts: {d}\n " , .{self .script_manager .scripts .len () });
434415 var n_ = self .script_manager .scripts .first ;
435416 while (n_ ) | n | {
436- std .debug .print (" - {s} complete: {any}\n " , .{ n .data .script .url , n .data .complete });
417+ const ps : * ScriptManager.PendingScript = @fieldParentPtr ("node" , n );
418+ std .debug .print (" - {s} complete: {any}\n " , .{ ps .script .url , ps .complete });
437419 n_ = n .next ;
438420 }
439421 }
440422
441423 {
442- std .debug .print ("\n deferreds: {d}\n " , .{self .script_manager .deferreds .len });
424+ std .debug .print ("\n deferreds: {d}\n " , .{self .script_manager .deferreds .len () });
443425 var n_ = self .script_manager .deferreds .first ;
444426 while (n_ ) | n | {
445- std .debug .print (" - {s} complete: {any}\n " , .{ n .data .script .url , n .data .complete });
427+ const ps : * ScriptManager.PendingScript = @fieldParentPtr ("node" , n );
428+ std .debug .print (" - {s} complete: {any}\n " , .{ ps .script .url , ps .complete });
446429 n_ = n .next ;
447430 }
448431 }
449432
450433 const now = std .time .milliTimestamp ();
451434 {
452- std .debug .print ("\n asyncs: {d}\n " , .{self .script_manager .asyncs .len });
435+ std .debug .print ("\n asyncs: {d}\n " , .{self .script_manager .asyncs .len () });
453436 var n_ = self .script_manager .asyncs .first ;
454437 while (n_ ) | n | {
455- std .debug .print (" - {s} complete: {any}\n " , .{ n .data .script .url , n .data .complete });
438+ const ps : * ScriptManager.PendingScript = @fieldParentPtr ("node" , n );
439+ std .debug .print (" - {s} complete: {any}\n " , .{ ps .script .url , ps .complete });
456440 n_ = n .next ;
457441 }
458442 }
0 commit comments