Skip to content

Commit 0122360

Browse files
committed
Reduce allocations made during request interception
Stream (to json) the Transfer as a request and response object in the various network interception-related events (e.g. Network.responseReceived). Add a page.request_intercepted boolean flag for CDP to signal the page that requests have been intercepted, allowing Page.wait to prioritize intercept handling (or, at least, not block it).
1 parent d9ed4cf commit 0122360

File tree

6 files changed

+279
-154
lines changed

6 files changed

+279
-154
lines changed

src/browser/page.zig

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,16 @@ pub const Page = struct {
9494

9595
load_state: LoadState = .parsing,
9696

97+
// Page.wait balances waiting for resources / tasks and producing an output.
98+
// Up until a timeout, Page.wait will always wait for inflight or pending
99+
// HTTP requests, via the Http.Client.active counter. However, intercepted
100+
// requests (via CDP, but it could be anything), aren't considered "active"
101+
// connection. So it's possible that we have intercepted requests (which are
102+
// pending on some driver to continue/abort) while Http.Client.active == 0.
103+
// This boolean exists to supplment Http.Client.active and inform Page.wait
104+
// of pending connections.
105+
request_intercepted: bool = false,
106+
97107
const Mode = union(enum) {
98108
pre: void,
99109
err: anyerror,
@@ -275,16 +285,26 @@ pub const Page = struct {
275285
while (true) {
276286
SW: switch (self.mode) {
277287
.pre, .raw => {
288+
if (self.request_intercepted) {
289+
// the page request was intercepted.
290+
291+
// there shouldn't be any active requests;
292+
std.debug.assert(http_client.active == 0);
293+
294+
// nothing we can do for this, need to kick the can up
295+
// the chain and wait for activity (e.g. a CDP message)
296+
// to unblock this.
297+
return;
298+
}
299+
278300
// The main page hasn't started/finished navigating.
279301
// There's no JS to run, and no reason to run the scheduler.
280-
281302
if (http_client.active == 0) {
282303
// haven't started navigating, I guess.
283304
return;
284305
}
285306

286307
// There should only be 1 active http transfer, the main page
287-
std.debug.assert(http_client.active == 1);
288308
try http_client.tick(ms_remaining);
289309
},
290310
.html, .parsed => {
@@ -330,17 +350,29 @@ pub const Page = struct {
330350

331351
_ = try scheduler.runLowPriority();
332352

333-
// We'll block here, waiting for network IO. We know
334-
// when the next timeout is scheduled, and we know how long
335-
// the caller wants to wait for, so we can pick a good wait
336-
// duration
337-
const ms_to_wait = @min(ms_remaining, ms_to_next_task orelse 1000);
353+
const request_intercepted = self.request_intercepted;
354+
355+
// We want to prioritize processing intercepted requests
356+
// because, the sooner they get unblocked, the sooner we
357+
// can start the HTTP request. But we still want to advanced
358+
// existing HTTP requests, if possible. So, if we have
359+
// intercepted requests, we'll still look at existing HTTP
360+
// requests, but we won't block waiting for more data.
361+
const ms_to_wait =
362+
if (request_intercepted) 0
363+
364+
// But if we have no intercepted requests, we'll wait
365+
// for as long as we can for data to our existing
366+
// inflight requests
367+
else @min(ms_remaining, ms_to_next_task orelse 1000);
368+
338369
try http_client.tick(ms_to_wait);
339370

340-
if (try_catch.hasCaught()) {
341-
const msg = (try try_catch.err(self.arena)) orelse "unknown";
342-
log.warn(.user_script, "page wait", .{ .err = msg, .src = "data" });
343-
return error.JsError;
371+
if (request_intercepted) {
372+
// Again, proritizing intercepted requests. Exit this
373+
// loop so that our caller can hopefully resolve them
374+
// (i.e. continue or abort them);
375+
return;
344376
}
345377
},
346378
.err => |err| return err,

src/cdp/cdp.zig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,7 @@ pub fn BrowserContext(comptime CDP_T: type) type {
495495
pub fn onHttpRequestIntercept(ctx: *anyopaque, data: *const Notification.RequestIntercept) !void {
496496
const self: *Self = @alignCast(@ptrCast(ctx));
497497
defer self.resetNotificationArena();
498-
try @import("domains/fetch.zig").requestPaused(self.notification_arena, self, data);
498+
try @import("domains/fetch.zig").requestIntercept(self.notification_arena, self, data);
499499
}
500500

501501
pub fn onHttpRequestFail(ctx: *anyopaque, data: *const Notification.RequestFail) !void {

src/cdp/domains/fetch.zig

Lines changed: 70 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,13 @@
1818

1919
const std = @import("std");
2020
const Allocator = std.mem.Allocator;
21-
const Notification = @import("../../notification.zig").Notification;
21+
2222
const log = @import("../../log.zig");
23+
const network = @import("network.zig");
24+
2325
const Method = @import("../../http/Client.zig").Method;
2426
const Transfer = @import("../../http/Client.zig").Transfer;
27+
const Notification = @import("../../notification.zig").Notification;
2528

2629
pub fn processMessage(cmd: anytype) !void {
2730
const action = std.meta.stringToEnum(enum {
@@ -41,22 +44,38 @@ pub fn processMessage(cmd: anytype) !void {
4144

4245
// Stored in CDP
4346
pub const InterceptState = struct {
44-
const Self = @This();
45-
waiting: std.AutoArrayHashMap(u64, *Transfer),
47+
allocator: Allocator,
48+
waiting: std.AutoArrayHashMapUnmanaged(u64, *Transfer),
4649

4750
pub fn init(allocator: Allocator) !InterceptState {
4851
return .{
49-
.waiting = std.AutoArrayHashMap(u64, *Transfer).init(allocator),
52+
.waiting = .empty,
53+
.allocator = allocator,
5054
};
5155
}
5256

53-
pub fn deinit(self: *Self) void {
54-
self.waiting.deinit();
57+
pub fn empty(self: *const InterceptState) bool {
58+
return self.waiting.count() == 0;
59+
}
60+
61+
pub fn put(self: *InterceptState, transfer: *Transfer) !void {
62+
return self.waiting.put(self.allocator, transfer.id, transfer);
63+
}
64+
65+
pub fn remove(self: *InterceptState, id: u64) ?*Transfer {
66+
const entry = self.waiting.fetchSwapRemove(id) orelse return null;
67+
return entry.value;
68+
}
69+
70+
pub fn deinit(self: *InterceptState) void {
71+
self.waiting.deinit(self.allocator);
5572
}
5673
};
5774

5875
const RequestPattern = struct {
59-
urlPattern: []const u8 = "*", // Wildcards ('*' -> zero or more, '?' -> exactly one) are allowed. Escape character is backslash. Omitting is equivalent to "*".
76+
// Wildcards ('*' -> zero or more, '?' -> exactly one) are allowed.
77+
// Escape character is backslash. Omitting is equivalent to "*".
78+
urlPattern: []const u8 = "*",
6079
resourceType: ?ResourceType = null,
6180
requestStage: RequestStage = .Request,
6281
};
@@ -115,60 +134,46 @@ fn disable(cmd: anytype) !void {
115134

116135
fn enable(cmd: anytype) !void {
117136
const params = (try cmd.params(EnableParam)) orelse EnableParam{};
118-
if (params.patterns.len != 0) log.warn(.cdp, "Fetch.enable No patterns yet", .{});
119-
if (params.handleAuthRequests) log.warn(.cdp, "Fetch.enable No auth yet", .{});
137+
if (params.patterns.len != 0) {
138+
log.warn(.cdp, "not implemented", .{ .feature = "Fetch.enable No patterns yet" });
139+
}
140+
if (params.handleAuthRequests) {
141+
log.warn(.cdp, "not implemented", .{ .feature = "Fetch.enable No auth yet" });
142+
}
120143

121144
const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded;
122145
try bc.fetchEnable();
123146

124147
return cmd.sendResult(null, .{});
125148
}
126149

127-
pub fn requestPaused(arena: Allocator, bc: anytype, intercept: *const Notification.RequestIntercept) !void {
150+
pub fn requestIntercept(arena: Allocator, bc: anytype, intercept: *const Notification.RequestIntercept) !void {
128151
var cdp = bc.cdp;
129152

130153
// unreachable because we _have_ to have a page.
131154
const session_id = bc.session_id orelse unreachable;
132155
const target_id = bc.target_id orelse unreachable;
156+
const page = bc.session.currentPage() orelse unreachable;
133157

134158
// We keep it around to wait for modifications to the request.
135159
// NOTE: we assume whomever created the request created it with a lifetime of the Page.
136160
// TODO: What to do when receiving replies for a previous page's requests?
137161

138162
const transfer = intercept.transfer;
139-
try cdp.intercept_state.waiting.put(transfer.id, transfer);
140-
141-
// NOTE: .request data preparation is duped from network.zig
142-
const full_request_url = transfer.uri;
143-
const request_url = try @import("network.zig").urlToString(arena, &full_request_url, .{
144-
.scheme = true,
145-
.authentication = true,
146-
.authority = true,
147-
.path = true,
148-
.query = true,
149-
});
150-
const request_fragment = try @import("network.zig").urlToString(arena, &full_request_url, .{
151-
.fragment = true,
152-
});
153-
const headers = try transfer.req.headers.asHashMap(arena);
154-
// End of duped code
163+
try cdp.intercept_state.put(transfer);
155164

156165
try cdp.sendEvent("Fetch.requestPaused", .{
157166
.requestId = try std.fmt.allocPrint(arena, "INTERCEPT-{d}", .{transfer.id}),
158-
.request = .{
159-
.url = request_url,
160-
.urlFragment = request_fragment,
161-
.method = @tagName(transfer.req.method),
162-
.hasPostData = transfer.req.body != null,
163-
.headers = std.json.ArrayHashMap([]const u8){ .map = headers },
164-
},
167+
.request = network.TransferAsRequestWriter.init(transfer),
165168
.frameId = target_id,
166169
.resourceType = ResourceType.Document, // TODO!
167170
.networkId = try std.fmt.allocPrint(arena, "REQ-{d}", .{transfer.id}),
168171
}, .{ .session_id = session_id });
169172

170173
// Await either continueRequest, failRequest or fulfillRequest
174+
171175
intercept.wait_for_interception.* = true;
176+
page.request_intercepted = true;
172177
}
173178

174179
const HeaderEntry = struct {
@@ -186,46 +191,67 @@ fn continueRequest(cmd: anytype) !void {
186191
headers: ?[]const HeaderEntry = null,
187192
interceptResponse: bool = false,
188193
})) orelse return error.InvalidParams;
189-
if (params.postData != null or params.headers != null or params.interceptResponse) return error.NotYetImplementedParams;
190194

195+
if (params.postData != null or params.headers != null or params.interceptResponse) {
196+
return error.NotYetImplementedParams;
197+
}
198+
199+
const page = bc.session.currentPage() orelse return error.PageNotLoaded;
200+
201+
var intercept_state = &bc.cdp.intercept_state;
191202
const request_id = try idFromRequestId(params.requestId);
192-
const entry = bc.cdp.intercept_state.waiting.fetchSwapRemove(request_id) orelse return error.RequestNotFound;
193-
const transfer = entry.value;
203+
const transfer = intercept_state.remove(request_id) orelse return error.RequestNotFound;
194204

195205
// Update the request with the new parameters
196206
if (params.url) |url| {
197-
// The request url must be modified in a way that's not observable by page. So page.url is not updated.
198-
try transfer.updateURL(try bc.cdp.browser.page_arena.allocator().dupeZ(u8, url));
207+
// The request url must be modified in a way that's not observable by page.
208+
// So page.url is not updated.
209+
try transfer.updateURL(try page.arena.dupeZ(u8, url));
199210
}
200211
if (params.method) |method| {
201212
transfer.req.method = std.meta.stringToEnum(Method, method) orelse return error.InvalidParams;
202213
}
203214

204-
log.info(.cdp, "Request continued by intercept", .{ .id = params.requestId });
215+
log.info(.cdp, "Request continued by intercept", .{
216+
.id = params.requestId,
217+
.url = transfer.uri,
218+
});
205219
try bc.cdp.browser.http_client.process(transfer);
206220

221+
if (intercept_state.empty()) {
222+
page.request_intercepted = false;
223+
}
224+
207225
return cmd.sendResult(null, .{});
208226
}
209227

210228
fn failRequest(cmd: anytype) !void {
211229
const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded;
212-
var state = &bc.cdp.intercept_state;
213230
const params = (try cmd.params(struct {
214231
requestId: []const u8, // "INTERCEPT-{d}"
215232
errorReason: ErrorReason,
216233
})) orelse return error.InvalidParams;
217234

235+
const page = bc.session.currentPage() orelse return error.PageNotLoaded;
236+
237+
var intercept_state = &bc.cdp.intercept_state;
218238
const request_id = try idFromRequestId(params.requestId);
219-
const entry = state.waiting.fetchSwapRemove(request_id) orelse return error.RequestNotFound;
220-
// entry.value is the transfer
221-
entry.value.abort();
239+
240+
const transfer = intercept_state.remove(request_id) orelse return error.RequestNotFound;
241+
transfer.abort();
242+
243+
if (intercept_state.empty()) {
244+
page.request_intercepted = false;
245+
}
222246

223247
log.info(.cdp, "Request aborted by intercept", .{ .reason = params.errorReason });
224248
return cmd.sendResult(null, .{});
225249
}
226250

227251
// Get u64 from requestId which is formatted as: "INTERCEPT-{d}"
228252
fn idFromRequestId(request_id: []const u8) !u64 {
229-
if (!std.mem.startsWith(u8, request_id, "INTERCEPT-")) return error.InvalidParams;
253+
if (!std.mem.startsWith(u8, request_id, "INTERCEPT-")) {
254+
return error.InvalidParams;
255+
}
230256
return std.fmt.parseInt(u64, request_id[10..], 10) catch return error.InvalidParams;
231257
}

0 commit comments

Comments
 (0)