Skip to content

Commit 5a1ab39

Browse files
committed
HTTP request notification
- Add 2 internal notifications 1 - http_request_start 2 - http_request_complete - When Network.enable CDP message is received, browser context registers for these 2 events (when Network.disable is called, it unregisters) - On http_request_start, CDP will emit a Network.requestWillBeSent message. This _does not_ include all the fields, but what we have appears to be enough for puppeteer.waitForNetworkIdle. - On http_request_complete, CDP will emit a Network.responseReceived message. This _does not_ include all the fields, bu what we have appears to be enough for puppeteer.waitForNetworkIdle. We currently don't emit any other new events, including any network-specific lifecycleEvent (i.e. Chrome will emit an networkIdle and networkAlmostIdle). To support this, the following other things were done: - CDP now has a `notification_arena` which is re-used between browser contexts. Normally, CDP code runs based on a "cmd" which has its own message_arena, but these notifications happen out-of-band, so we needed a new arena which is valid for handling 1 notification. - HTTP Client is notification-aware. The SessionState no longer includes the *http.Client directly. It instead includes an http.RequestFactory which is the combination fo the client + a specific configuration (i.e. *Notification). This ensures that all requests made from that factory have the same settings. - However, despite the above, _some_ requests do not appear to emit CDP events, such as loading a <script src="X">. So the page still deals directly with the *http.Client. - Playwright and Puppeteer (but Playwright in particular) are very sensitive to event ordering. These new events have introduced additional sensitivity. The result sent to Page.navigate had to be moved to inside the navigate event handler, which meant passing some cdp-specific data (the input.id) into the NavigateOpts. This is the only way I found to keep both happy - the sequence of events is closer (but still pretty far) from what Chrome does.
1 parent eed3d27 commit 5a1ab39

File tree

9 files changed

+364
-41
lines changed

9 files changed

+364
-41
lines changed

src/browser/env.zig

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ const storage = @import("storage/storage.zig");
77
const generate = @import("../runtime/generate.zig");
88
const Renderer = @import("renderer.zig").Renderer;
99
const Loop = @import("../runtime/loop.zig").Loop;
10-
const HttpClient = @import("../http/client.zig").Client;
10+
const RequestFactory = @import("../http/client.zig").RequestFactory;
1111

1212
const WebApis = struct {
1313
// Wrapped like this for debug ergonomics.
@@ -54,8 +54,8 @@ pub const SessionState = struct {
5454
window: *Window,
5555
renderer: *Renderer,
5656
arena: std.mem.Allocator,
57-
http_client: *HttpClient,
5857
cookie_jar: *storage.CookieJar,
58+
request_factory: RequestFactory,
5959

6060
// dangerous, but set by the JS framework
6161
// shorter-lived than the arena above, which

src/browser/page.zig

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ pub const Page = struct {
9898
.renderer = &self.renderer,
9999
.loop = browser.app.loop,
100100
.cookie_jar = &session.cookie_jar,
101-
.http_client = browser.http_client,
101+
.request_factory = browser.http_client.requestFactory(browser.notification),
102102
},
103103
.scope = try session.executor.startScope(&self.window, &self.state, self, true),
104104
.module_map = .empty,
@@ -174,6 +174,7 @@ pub const Page = struct {
174174
pub fn navigate(self: *Page, request_url: URL, opts: NavigateOpts) !void {
175175
const arena = self.arena;
176176
const session = self.session;
177+
const notification = session.browser.notification;
177178

178179
log.debug("starting GET {s}", .{request_url});
179180

@@ -195,10 +196,11 @@ pub const Page = struct {
195196
// load the data
196197
var request = try self.newHTTPRequest(.GET, &self.url, .{ .navigation = true });
197198
defer request.deinit();
199+
request.notification = notification;
198200

199-
session.browser.notification.dispatch(.page_navigate, &.{
201+
notification.dispatch(.page_navigate, &.{
202+
.opts = opts,
200203
.url = &self.url,
201-
.reason = opts.reason,
202204
.timestamp = timestamp(),
203205
});
204206

@@ -238,7 +240,7 @@ pub const Page = struct {
238240
self.raw_data = arr.items;
239241
}
240242

241-
session.browser.notification.dispatch(.page_navigated, &.{
243+
notification.dispatch(.page_navigated, &.{
242244
.url = &self.url,
243245
.timestamp = timestamp(),
244246
});
@@ -464,7 +466,9 @@ pub const Page = struct {
464466
}
465467

466468
fn newHTTPRequest(self: *const Page, method: http.Request.Method, url: *const URL, opts: storage.cookie.LookupOpts) !http.Request {
467-
var request = try self.state.http_client.request(method, &url.uri);
469+
// Don't use the state's request_factory here, since requests made by the
470+
// page (i.e. to load <scripts>) should not generate notifications.
471+
var request = try self.session.browser.http_client.request(method, &url.uri);
468472
errdefer request.deinit();
469473

470474
var arr: std.ArrayListUnmanaged(u8) = .{};
@@ -661,7 +665,8 @@ pub const NavigateReason = enum {
661665
address_bar,
662666
};
663667

664-
const NavigateOpts = struct {
668+
pub const NavigateOpts = struct {
669+
cdp_id: ?i64 = null,
665670
reason: NavigateReason = .address_bar,
666671
};
667672

src/browser/xhr/xhr.zig

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@ const XMLHttpRequestBodyInit = union(enum) {
8080
pub const XMLHttpRequest = struct {
8181
proto: XMLHttpRequestEventTarget = XMLHttpRequestEventTarget{},
8282
arena: Allocator,
83-
client: *http.Client,
8483
request: ?http.Request = null,
8584

8685
priv_state: PrivState = .new,
@@ -252,7 +251,6 @@ pub const XMLHttpRequest = struct {
252251
.state = .unsent,
253252
.url = null,
254253
.origin_url = session_state.url,
255-
.client = session_state.http_client,
256254
.cookie_jar = session_state.cookie_jar,
257255
};
258256
}
@@ -420,7 +418,7 @@ pub const XMLHttpRequest = struct {
420418
self.send_flag = true;
421419
self.priv_state = .open;
422420

423-
self.request = try self.client.request(self.method, &self.url.?.uri);
421+
self.request = try session_state.request_factory.create(self.method, &self.url.?.uri);
424422
var request = &self.request.?;
425423
errdefer request.deinit();
426424

src/cdp/cdp.zig

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ pub fn CDPT(comptime TypeProvider: type) type {
6969
// 1 message at a time.
7070
message_arena: std.heap.ArenaAllocator,
7171

72+
// Used for processing notifications within a browser context.
73+
notification_arena: std.heap.ArenaAllocator,
74+
7275
const Self = @This();
7376

7477
pub fn init(app: *App, client: TypeProvider.Client) !Self {
@@ -82,6 +85,7 @@ pub fn CDPT(comptime TypeProvider: type) type {
8285
.allocator = allocator,
8386
.browser_context = null,
8487
.message_arena = std.heap.ArenaAllocator.init(allocator),
88+
.notification_arena = std.heap.ArenaAllocator.init(allocator),
8589
};
8690
}
8791

@@ -91,6 +95,7 @@ pub fn CDPT(comptime TypeProvider: type) type {
9195
}
9296
self.browser.deinit();
9397
self.message_arena.deinit();
98+
self.notification_arena.deinit();
9499
}
95100

96101
pub fn handleMessage(self: *Self, msg: []const u8) bool {
@@ -259,7 +264,7 @@ pub fn CDPT(comptime TypeProvider: type) type {
259264
});
260265
}
261266

262-
fn sendJSON(self: *Self, message: anytype) !void {
267+
pub fn sendJSON(self: *Self, message: anytype) !void {
263268
return self.client.sendJSON(message, .{
264269
.emit_null_optional_fields = false,
265270
});
@@ -283,6 +288,12 @@ pub fn BrowserContext(comptime CDP_T: type) type {
283288
// Points to the session arena
284289
arena: Allocator,
285290

291+
// From the parent's notification_arena.allocator(). Most of the CDP
292+
// code paths deal with a cmd which has its own arena (from the
293+
// message_arena). But notifications happen outside of the typical CDP
294+
// request->response, and thus don't have a cmd and don't have an arena.
295+
notification_arena: Allocator,
296+
286297
// Maps to our Page. (There are other types of targets, but we only
287298
// deal with "pages" for now). Since we only allow 1 open page at a
288299
// time, we only have 1 target_id.
@@ -336,6 +347,7 @@ pub fn BrowserContext(comptime CDP_T: type) type {
336347
.node_search_list = undefined,
337348
.isolated_world = null,
338349
.inspector = inspector,
350+
.notification_arena = cdp.notification_arena.allocator(),
339351
};
340352
self.node_search_list = Node.Search.List.init(allocator, &self.node_registry);
341353
errdefer self.deinit();
@@ -397,6 +409,16 @@ pub fn BrowserContext(comptime CDP_T: type) type {
397409
return if (raw_url.len == 0) null else raw_url;
398410
}
399411

412+
pub fn networkEnable(self: *Self) !void {
413+
try self.cdp.browser.notification.register(.http_request_start, self, onHttpRequestStart);
414+
try self.cdp.browser.notification.register(.http_request_complete, self, onHttpRequestComplete);
415+
}
416+
417+
pub fn networkDisable(self: *Self) void {
418+
self.cdp.browser.notification.unregister(.http_request_start, self);
419+
self.cdp.browser.notification.unregister(.http_request_complete, self);
420+
}
421+
400422
pub fn onPageRemove(ctx: *anyopaque, _: Notification.PageRemove) !void {
401423
const self: *Self = @alignCast(@ptrCast(ctx));
402424
return @import("domains/page.zig").pageRemove(self);
@@ -409,14 +431,31 @@ pub fn BrowserContext(comptime CDP_T: type) type {
409431

410432
pub fn onPageNavigate(ctx: *anyopaque, data: *const Notification.PageNavigate) !void {
411433
const self: *Self = @alignCast(@ptrCast(ctx));
412-
return @import("domains/page.zig").pageNavigate(self, data);
434+
defer self.resetNotificationArena();
435+
return @import("domains/page.zig").pageNavigate(self.notification_arena, self, data);
413436
}
414437

415438
pub fn onPageNavigated(ctx: *anyopaque, data: *const Notification.PageNavigated) !void {
416439
const self: *Self = @alignCast(@ptrCast(ctx));
417440
return @import("domains/page.zig").pageNavigated(self, data);
418441
}
419442

443+
pub fn onHttpRequestStart(ctx: *anyopaque, data: *const Notification.RequestStart) !void {
444+
const self: *Self = @alignCast(@ptrCast(ctx));
445+
defer self.resetNotificationArena();
446+
return @import("domains/network.zig").httpRequestStart(self.notification_arena, self, data);
447+
}
448+
449+
pub fn onHttpRequestComplete(ctx: *anyopaque, data: *const Notification.RequestComplete) !void {
450+
const self: *Self = @alignCast(@ptrCast(ctx));
451+
defer self.resetNotificationArena();
452+
return @import("domains/network.zig").httpRequestComplete(self.notification_arena, self, data);
453+
}
454+
455+
fn resetNotificationArena(self: *Self) void {
456+
defer _ = self.cdp.notification_arena.reset(.{ .retain_with_limit = 1024 * 64 });
457+
}
458+
420459
pub fn callInspector(self: *const Self, msg: []const u8) void {
421460
self.inspector.send(msg);
422461
// force running micro tasks after send input to the inspector.

src/cdp/domains/network.zig

Lines changed: 116 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,130 @@
1717
// along with this program. If not, see <https://www.gnu.org/licenses/>.
1818

1919
const std = @import("std");
20+
const Notification = @import("../../notification.zig").Notification;
21+
22+
const Allocator = std.mem.Allocator;
2023

2124
pub fn processMessage(cmd: anytype) !void {
2225
const action = std.meta.stringToEnum(enum {
2326
enable,
27+
disable,
2428
setCacheDisabled,
2529
}, cmd.input.action) orelse return error.UnknownMethod;
2630

2731
switch (action) {
28-
.enable => return cmd.sendResult(null, .{}),
32+
.enable => return enable(cmd),
33+
.disable => return disable(cmd),
2934
.setCacheDisabled => return cmd.sendResult(null, .{}),
3035
}
3136
}
37+
38+
fn enable(cmd: anytype) !void {
39+
const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded;
40+
try bc.networkEnable();
41+
return cmd.sendResult(null, .{});
42+
}
43+
44+
fn disable(cmd: anytype) !void {
45+
const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded;
46+
bc.networkDisable();
47+
return cmd.sendResult(null, .{});
48+
}
49+
50+
pub fn httpRequestStart(arena: Allocator, bc: anytype, request: *const Notification.RequestStart) !void {
51+
// Isn't possible to do a network request within a Browser (which our
52+
// notification is tied to), without a page.
53+
std.debug.assert(bc.session.page != null);
54+
55+
var cdp = bc.cdp;
56+
57+
// all unreachable because we _have_ to have a page.
58+
const session_id = bc.session_id orelse unreachable;
59+
const target_id = bc.target_id orelse unreachable;
60+
const page = bc.session.currentPage() orelse unreachable;
61+
62+
const document_url = try urlToString(arena, &page.url.uri, .{
63+
.scheme = true,
64+
.authentication = true,
65+
.authority = true,
66+
.path = true,
67+
.query = true,
68+
});
69+
70+
const request_url = try urlToString(arena, request.url, .{
71+
.scheme = true,
72+
.authentication = true,
73+
.authority = true,
74+
.path = true,
75+
.query = true,
76+
});
77+
78+
const request_fragment = try urlToString(arena, request.url, .{
79+
.fragment = true,
80+
});
81+
82+
var headers: std.StringArrayHashMapUnmanaged([]const u8) = .empty;
83+
try headers.ensureTotalCapacity(arena, request.headers.len);
84+
for (request.headers) |header| {
85+
headers.putAssumeCapacity(header.name, header.value);
86+
}
87+
88+
// We're missing a bunch of fields, but, for now, this seems like enough
89+
try cdp.sendEvent("Network.requestWillBeSent", .{
90+
.requestId = try std.fmt.allocPrint(arena, "REQ-{d}", .{request.id}),
91+
.frameId = target_id,
92+
.loaderId = bc.loader_id,
93+
.documentUrl = document_url,
94+
.request = .{
95+
.url = request_url,
96+
.urlFragment = request_fragment,
97+
.method = @tagName(request.method),
98+
.hasPostData = request.has_body,
99+
.headers = std.json.ArrayHashMap([]const u8){ .map = headers },
100+
},
101+
}, .{ .session_id = session_id });
102+
}
103+
104+
pub fn httpRequestComplete(arena: Allocator, bc: anytype, request: *const Notification.RequestComplete) !void {
105+
// Isn't possible to do a network request within a Browser (which our
106+
// notification is tied to), without a page.
107+
std.debug.assert(bc.session.page != null);
108+
109+
var cdp = bc.cdp;
110+
111+
// all unreachable because we _have_ to have a page.
112+
const session_id = bc.session_id orelse unreachable;
113+
const target_id = bc.target_id orelse unreachable;
114+
115+
const url = try urlToString(arena, request.url, .{
116+
.scheme = true,
117+
.authentication = true,
118+
.authority = true,
119+
.path = true,
120+
.query = true,
121+
});
122+
123+
var headers: std.StringArrayHashMapUnmanaged([]const u8) = .empty;
124+
try headers.ensureTotalCapacity(arena, request.headers.len);
125+
for (request.headers) |header| {
126+
headers.putAssumeCapacity(header.name, header.value);
127+
}
128+
129+
// We're missing a bunch of fields, but, for now, this seems like enough
130+
try cdp.sendEvent("Network.responseReceived", .{
131+
.requestId = try std.fmt.allocPrint(arena, "REQ-{d}", .{request.id}),
132+
.frameId = target_id,
133+
.loaderId = bc.loader_id,
134+
.response = .{
135+
.url = url,
136+
.status = request.status,
137+
.headers = std.json.ArrayHashMap([]const u8){ .map = headers },
138+
},
139+
}, .{ .session_id = session_id });
140+
}
141+
142+
fn urlToString(arena: Allocator, url: *const std.Uri, opts: std.Uri.WriteToStreamOptions) ![]const u8 {
143+
var buf: std.ArrayListUnmanaged(u8) = .empty;
144+
try url.writeToStream(opts, buf.writer(arena));
145+
return buf.items;
146+
}

0 commit comments

Comments
 (0)