@@ -90,16 +90,6 @@ pub const Page = struct {
9090
9191 load_state : LoadState = .parsing ,
9292
93- // Page.wait balances waiting for resources / tasks and producing an output.
94- // Up until a timeout, Page.wait will always wait for inflight or pending
95- // HTTP requests, via the Http.Client.active counter. However, intercepted
96- // requests (via CDP, but it could be anything), aren't considered "active"
97- // connection. So it's possible that we have intercepted requests (which are
98- // pending on some driver to continue/abort) while Http.Client.active == 0.
99- // This boolean exists to supplment Http.Client.active and inform Page.wait
100- // of pending connections.
101- request_intercepted : bool = false ,
102-
10393 const Mode = union (enum ) {
10494 pre : void ,
10595 err : anyerror ,
@@ -262,23 +252,26 @@ pub const Page = struct {
262252 return self .script_manager .blockingGet (src );
263253 }
264254
265- pub fn wait (self : * Page , wait_sec : u16 ) void {
266- self ._wait (wait_sec ) catch | err | switch (err ) {
267- error .JsError = > {}, // already logged (with hopefully more context)
268- else = > {
269- // There may be errors from the http/client or ScriptManager
270- // that we should not treat as an error like this. Will need
271- // to run this through more real-world sites and see if we need
272- // to expand the switch (err) to have more customized logs for
273- // specific messages.
274- log .err (.browser , "page wait" , .{ .err = err });
275- },
255+ pub fn wait (self : * Page , wait_ms : i32 ) Session.WaitResult {
256+ return self ._wait (wait_ms ) catch | err | {
257+ switch (err ) {
258+ error .JsError = > {}, // already logged (with hopefully more context)
259+ else = > {
260+ // There may be errors from the http/client or ScriptManager
261+ // that we should not treat as an error like this. Will need
262+ // to run this through more real-world sites and see if we need
263+ // to expand the switch (err) to have more customized logs for
264+ // specific messages.
265+ log .err (.browser , "page wait" , .{ .err = err });
266+ },
267+ }
268+ return .done ;
276269 };
277270 }
278271
279- fn _wait (self : * Page , wait_sec : u16 ) ! void {
272+ fn _wait (self : * Page , wait_ms : i32 ) ! Session.WaitResult {
280273 var timer = try std .time .Timer .start ();
281- var ms_remaining : i32 = @intCast ( wait_sec * 1000 ) ;
274+ var ms_remaining = wait_ms ;
282275
283276 var try_catch : Env.TryCatch = undefined ;
284277 try_catch .init (self .main_context );
@@ -287,112 +280,113 @@ pub const Page = struct {
287280 var scheduler = & self .scheduler ;
288281 var http_client = self .http_client ;
289282
283+ // I'd like the page to know NOTHING about extra_socket / CDP, but the
284+ // fact is that the behavior of wait changes depending on whether or
285+ // not we're using CDP.
286+ // If we aren't using CDP, as soon as we think there's nothing left
287+ // to do, we can exit - we'de done.
288+ // But if we are using CDP, we should wait for the whole `wait_ms`
289+ // because the http_click.tick() also monitors the CDP socket. And while
290+ // we could let CDP poll http (like it does for HTTP requests), the fact
291+ // is that we know more about the timing of stuff (e.g. how long to
292+ // poll/sleep) in the page.
293+ const exit_when_done = http_client .extra_socket == null ;
294+
290295 // for debugging
291296 // defer self.printWaitAnalysis();
292297
293298 while (true ) {
294- SW : switch (self .mode ) {
299+ switch (self .mode ) {
295300 .pre , .raw , .text = > {
296- if (self .request_intercepted ) {
297- // the page request was intercepted.
298-
299- // there shouldn't be any active requests;
300- std .debug .assert (http_client .active == 0 );
301-
302- // nothing we can do for this, need to kick the can up
303- // the chain and wait for activity (e.g. a CDP message)
304- // to unblock this.
305- return ;
306- }
307-
308301 // The main page hasn't started/finished navigating.
309302 // There's no JS to run, and no reason to run the scheduler.
310- if (http_client .active == 0 ) {
303+ if (http_client .active == 0 and exit_when_done ) {
311304 // haven't started navigating, I guess.
312- return ;
305+ return .done ;
313306 }
314307
315- // There should only be 1 active http transfer, the main page
316- _ = try http_client .tick (.{ .timeout_ms = ms_remaining });
308+ // Either we have active http connections, or we're in CDP
309+ // mode with an extra socket. Either way, we're waiting
310+ // for http traffic
311+ if (try http_client .tick (ms_remaining ) == .extra_socket ) {
312+ // data on a socket we aren't handling, return to caller
313+ return .extra_socket ;
314+ }
317315 },
318316 .html , .parsed = > {
319317 // The HTML page was parsed. We now either have JS scripts to
320- // download, or timeouts to execute, or both.
318+ // download, or scheduled tasks to execute, or both.
321319
322320 // scheduler.run could trigger new http transfers, so do not
323321 // store http_client.active BEFORE this call and then use
324322 // it AFTER.
325323 const ms_to_next_task = try scheduler .runHighPriority ();
324+ _ = try scheduler .runLowPriority ();
326325
327326 if (try_catch .hasCaught ()) {
328327 const msg = (try try_catch .err (self .arena )) orelse "unknown" ;
329328 log .warn (.user_script , "page wait" , .{ .err = msg , .src = "scheduler" });
330329 return error .JsError ;
331330 }
332331
333- if (http_client .active == 0 ) {
334- if (ms_to_next_task ) | ms | {
335- // There are no HTTP transfers, so there's no point calling
336- // http_client.tick.
337- // TODO: should we just force-run the scheduler??
338-
339- if (ms > ms_remaining ) {
340- // we'd wait to long, might as well exit early.
341- return ;
332+ if (http_client .active == 0 and exit_when_done ) {
333+ const ms = ms_to_next_task orelse blk : {
334+ // TODO: when jsRunner is fully replaced with the
335+ // htmlRunner, we can remove the first part of this
336+ // condition. jsRunner calls `page.wait` far too
337+ // often to enforce this.
338+ if (wait_ms > 100 and wait_ms - ms_remaining < 100 ) {
339+ // Look, we want to exit ASAP, but we don't want
340+ // to exit so fast that we've run none of the
341+ // background jobs.
342+ break :blk 50 ;
342343 }
343- _ = try scheduler . runLowPriority ();
344-
345- // We must use a u64 here b/c ms is a u32 and the
346- // conversion to ns can generate an integer
347- // overflow.
348- const _ms : u64 = @intCast (ms );
349-
350- std . Thread . sleep ( std . time . ns_per_ms * _ms );
351- break : SW ;
344+ // no http transfers, no cdp extra socket, no
345+ // scheduled tasks, we're done.
346+ return .done ;
347+ };
348+
349+ if (ms > ms_remaining ) {
350+ // same as above, except we have a scheduled task,
351+ // it just happens to be too far into the future.s
352+ return .done ;
352353 }
353354
354- // We have no active http transfer and no pending
355- // schedule tasks. We're done
356- return ;
357- }
358-
359- _ = try scheduler .runLowPriority ();
360-
361- const request_intercepted = self .request_intercepted ;
362-
363- // We want to prioritize processing intercepted requests
364- // because, the sooner they get unblocked, the sooner we
365- // can start the HTTP request. But we still want to advanced
366- // existing HTTP requests, if possible. So, if we have
367- // intercepted requests, we'll still look at existing HTTP
368- // requests, but we won't block waiting for more data.
369- const ms_to_wait =
370- if (request_intercepted ) 0
371-
372- // But if we have no intercepted requests, we'll wait
373- // for as long as we can for data to our existing
374- // inflight requests
375- else @min (ms_remaining , ms_to_next_task orelse 1000 );
376-
377- _ = try http_client .tick (.{ .timeout_ms = ms_to_wait });
378-
379- if (request_intercepted ) {
380- // Again, proritizing intercepted requests. Exit this
381- // loop so that our caller can hopefully resolve them
382- // (i.e. continue or abort them);
383- return ;
355+ // we have a task to run in the not-so-distant future.
356+ // You might think we can just sleep until that task is
357+ // ready, but we should continue to run lowPriority tasks
358+ // in the meantime, and that could unblock things. So
359+ // we'll just sleep for a bit, and then restart our wait
360+ // loop to see what's changed
361+ std .Thread .sleep (std .time .ns_per_ms * @as (u64 , @intCast (@min (ms , 20 ))));
362+ } else {
363+ // We're here because we either have active HTTP
364+ // connections, of exit_when_done == false (aka, there's
365+ // an extra_socket registered with the http client).
366+ const ms_to_wait = @min (ms_remaining , ms_to_next_task orelse 100 );
367+ if (try http_client .tick (ms_to_wait ) == .extra_socket ) {
368+ // data on a socket we aren't handling, return to caller
369+ return .extra_socket ;
370+ }
384371 }
385372 },
386373 .err = > | err | {
387374 self .mode = .{ .raw_done = @errorName (err ) };
388375 return err ;
389376 },
390- .raw_done = > return ,
377+ .raw_done = > {
378+ if (exit_when_done ) {
379+ return .done ;
380+ }
381+ // we _could_ http_client.tick(ms_to_wait), but this has
382+ // the same result, and I feel is more correct.
383+ return .no_page ;
384+ }
391385 }
392386
393387 const ms_elapsed = timer .lap () / 1_000_000 ;
394388 if (ms_elapsed >= ms_remaining ) {
395- return ;
389+ return .done ;
396390 }
397391 ms_remaining -= @intCast (ms_elapsed );
398392 }
@@ -405,48 +399,53 @@ pub const Page = struct {
405399 std .debug .print ("\n active requests: {d}\n " , .{self .http_client .active });
406400 var n_ = self .http_client .handles .in_use .first ;
407401 while (n_ ) | n | {
408- const transfer = Http .Transfer .fromEasy (n .data .conn .easy ) catch | err | {
402+ const handle : * Http.Client.Handle = @fieldParentPtr ("node" , n );
403+ const transfer = Http .Transfer .fromEasy (handle .conn .easy ) catch | err | {
409404 std .debug .print (" - failed to load transfer: {any}\n " , .{err });
410405 break ;
411406 };
412- std .debug .print (" - {s }\n " , .{transfer });
407+ std .debug .print (" - {f }\n " , .{transfer });
413408 n_ = n .next ;
414409 }
415410 }
416411
417412 {
418- std .debug .print ("\n queued requests: {d}\n " , .{self .http_client .queue .len });
413+ std .debug .print ("\n queued requests: {d}\n " , .{self .http_client .queue .len () });
419414 var n_ = self .http_client .queue .first ;
420415 while (n_ ) | n | {
421- std .debug .print (" - {s}\n " , .{n .data .url });
416+ const transfer : * Http.Transfer = @fieldParentPtr ("_node" , n );
417+ std .debug .print (" - {f}\n " , .{transfer .uri });
422418 n_ = n .next ;
423419 }
424420 }
425421
426422 {
427- std .debug .print ("\n scripts: {d}\n " , .{self .script_manager .scripts .len });
423+ std .debug .print ("\n scripts: {d}\n " , .{self .script_manager .scripts .len () });
428424 var n_ = self .script_manager .scripts .first ;
429425 while (n_ ) | n | {
430- std .debug .print (" - {s} complete: {any}\n " , .{ n .data .script .url , n .data .complete });
426+ const ps : * ScriptManager.PendingScript = @fieldParentPtr ("node" , n );
427+ std .debug .print (" - {s} complete: {any}\n " , .{ ps .script .url , ps .complete });
431428 n_ = n .next ;
432429 }
433430 }
434431
435432 {
436- std .debug .print ("\n deferreds: {d}\n " , .{self .script_manager .deferreds .len });
433+ std .debug .print ("\n deferreds: {d}\n " , .{self .script_manager .deferreds .len () });
437434 var n_ = self .script_manager .deferreds .first ;
438435 while (n_ ) | n | {
439- std .debug .print (" - {s} complete: {any}\n " , .{ n .data .script .url , n .data .complete });
436+ const ps : * ScriptManager.PendingScript = @fieldParentPtr ("node" , n );
437+ std .debug .print (" - {s} complete: {any}\n " , .{ ps .script .url , ps .complete });
440438 n_ = n .next ;
441439 }
442440 }
443441
444442 const now = std .time .milliTimestamp ();
445443 {
446- std .debug .print ("\n asyncs: {d}\n " , .{self .script_manager .asyncs .len });
444+ std .debug .print ("\n asyncs: {d}\n " , .{self .script_manager .asyncs .len () });
447445 var n_ = self .script_manager .asyncs .first ;
448446 while (n_ ) | n | {
449- std .debug .print (" - {s} complete: {any}\n " , .{ n .data .script .url , n .data .complete });
447+ const ps : * ScriptManager.PendingScript = @fieldParentPtr ("node" , n );
448+ std .debug .print (" - {s} complete: {any}\n " , .{ ps .script .url , ps .complete });
450449 n_ = n .next ;
451450 }
452451 }
0 commit comments