Skip to content

Commit be9e953

Browse files
committed
Add CDP Node Registry
This expands on the existing CDP node work used in DOM.search. It introduces a node registry to track all nodes returned to the client and give lookups to get a node from a Id or a *parser.node. Eventually, the goal is to have the Registry emit the DOM.setChildNodes event whenever necessary, as well as support many of the missing DOM actions. Added tests to existing search handlers. Reworked search a little bit to avoid some unnecessary allocations and to hook it into the registry. The generated Node is currently incomplete. The parentId is missing, the children are missing. Also, we still need to associate the v8 ObjectId to the node. Finally, I moved all action handlers into a nested "domain" folder.
1 parent 82e67b7 commit be9e953

19 files changed

+663
-325
lines changed

src/cdp/Node.zig

Lines changed: 385 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,385 @@
1+
// Copyright (C) 2023-2024 Lightpanda (Selecy SAS)
2+
//
3+
// Francis Bouvier <[email protected]>
4+
// Pierre Tachoire <[email protected]>
5+
//
6+
// This program is free software: you can redistribute it and/or modify
7+
// it under the terms of the GNU Affero General Public License as
8+
// published by the Free Software Foundation, either version 3 of the
9+
// License, or (at your option) any later version.
10+
//
11+
// This program is distributed in the hope that it will be useful,
12+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
// GNU Affero General Public License for more details.
15+
//
16+
// You should have received a copy of the GNU Affero General Public License
17+
// along with this program. If not, see <https://www.gnu.org/licenses/>.
18+
19+
const std = @import("std");
20+
const parser = @import("netsurf");
21+
const Allocator = std.mem.Allocator;
22+
23+
pub const Id = u32;
24+
25+
const Node = @This();
26+
27+
id: Id,
28+
parent_id: ?Id = null,
29+
node_type: u32,
30+
backend_node_id: Id,
31+
node_name: []const u8,
32+
local_name: []const u8,
33+
node_value: []const u8,
34+
child_node_count: u32,
35+
children: []const *Node,
36+
document_url: ?[]const u8,
37+
base_url: ?[]const u8,
38+
xml_version: []const u8,
39+
compatibility_mode: CompatibilityMode,
40+
is_scrollable: bool,
41+
_node: *parser.Node,
42+
43+
const CompatibilityMode = enum {
44+
NoQuirksMode,
45+
};
46+
47+
pub fn jsonStringify(self: *const Node, writer: anytype) !void {
48+
try writer.beginObject();
49+
try writer.objectField("nodeId");
50+
try writer.write(self.id);
51+
52+
try writer.objectField("parentId");
53+
try writer.write(self.parent_id);
54+
55+
try writer.objectField("backendNodeId");
56+
try writer.write(self.backend_node_id);
57+
58+
try writer.objectField("nodeType");
59+
try writer.write(self.node_type);
60+
61+
try writer.objectField("nodeName");
62+
try writer.write(self.node_name);
63+
64+
try writer.objectField("localName");
65+
try writer.write(self.local_name);
66+
67+
try writer.objectField("nodeValue");
68+
try writer.write(self.node_value);
69+
70+
try writer.objectField("childNodeCount");
71+
try writer.write(self.child_node_count);
72+
73+
try writer.objectField("children");
74+
try writer.write(self.children);
75+
76+
try writer.objectField("documentURL");
77+
try writer.write(self.document_url);
78+
79+
try writer.objectField("baseURL");
80+
try writer.write(self.base_url);
81+
82+
try writer.objectField("xmlVersion");
83+
try writer.write(self.xml_version);
84+
85+
try writer.objectField("compatibilityMode");
86+
try writer.write(self.compatibility_mode);
87+
88+
try writer.objectField("isScrollable");
89+
try writer.write(self.is_scrollable);
90+
try writer.endObject();
91+
}
92+
93+
// Whenever we send a node to the client, we register it here for future lookup.
94+
// We maintain a node -> id and id -> node lookup.
95+
pub const Registry = struct {
96+
node_id: u32,
97+
allocator: Allocator,
98+
node_pool: std.heap.MemoryPool(Node),
99+
lookup_by_id: std.AutoHashMapUnmanaged(Id, *Node),
100+
lookup_by_node: std.HashMapUnmanaged(*parser.Node, *Node, NodeContext, std.hash_map.default_max_load_percentage),
101+
102+
pub fn init(allocator: Allocator) Registry {
103+
return .{
104+
.node_id = 0,
105+
.allocator = allocator,
106+
.lookup_by_id = .{},
107+
.lookup_by_node = .{},
108+
.node_pool = std.heap.MemoryPool(Node).init(allocator),
109+
};
110+
}
111+
112+
pub fn deinit(self: *Registry) void {
113+
const allocator = self.allocator;
114+
self.lookup_by_id.deinit(allocator);
115+
self.lookup_by_node.deinit(allocator);
116+
self.node_pool.deinit();
117+
}
118+
119+
pub fn reset(self: *Registry) void {
120+
self.lookup_by_id.clearRetainingCapacity();
121+
self.lookup_by_node.clearRetainingCapacity();
122+
_ = self.node_pool.reset(.{ .retain_capacity = {} });
123+
}
124+
125+
pub fn register(self: *Registry, n: *parser.Node) !*Node {
126+
const node_lookup_gop = try self.lookup_by_node.getOrPut(self.allocator, n);
127+
if (node_lookup_gop.found_existing) {
128+
return node_lookup_gop.value_ptr.*;
129+
}
130+
131+
// on error, we're probably going to abort the entire browser context
132+
// but, just in case, let's try to keep things tidy.
133+
errdefer _ = self.lookup_by_node.remove(n);
134+
135+
const children = try parser.nodeGetChildNodes(n);
136+
const children_count = try parser.nodeListLength(children);
137+
138+
const id = self.node_id;
139+
defer self.node_id = id + 1;
140+
141+
const node = try self.node_pool.create();
142+
errdefer self.node_pool.destroy(node);
143+
144+
node.* = .{
145+
._node = n,
146+
.id = id,
147+
.parent_id = null, // TODO
148+
.backend_node_id = id, // ??
149+
.node_name = try parser.nodeName(n),
150+
.local_name = try parser.nodeLocalName(n),
151+
.node_value = try parser.nodeValue(n) orelse "",
152+
.node_type = @intFromEnum(try parser.nodeType(n)),
153+
.child_node_count = children_count,
154+
.children = &.{}, // TODO
155+
.document_url = null,
156+
.base_url = null,
157+
.xml_version = "",
158+
.compatibility_mode = .NoQuirksMode,
159+
.is_scrollable = false,
160+
};
161+
162+
// if (try parser.nodeParentNode(n)) |pn| {
163+
// _ = pn;
164+
// // TODO
165+
// }
166+
167+
node_lookup_gop.value_ptr.* = node;
168+
try self.lookup_by_id.putNoClobber(self.allocator, id, node);
169+
return node;
170+
}
171+
};
172+
173+
const NodeContext = struct {
174+
pub fn hash(_: NodeContext, n: *parser.Node) u64 {
175+
return std.hash.Wyhash.hash(0, std.mem.asBytes(&@intFromPtr(n)));
176+
}
177+
178+
pub fn eql(_: NodeContext, a: *parser.Node, b: *parser.Node) bool {
179+
return @intFromPtr(a) == @intFromPtr(b);
180+
}
181+
};
182+
183+
// Searches are a 3 step process:
184+
// 1 - Dom.performSearch
185+
// 2 - Dom.getSearchResults
186+
// 3 - Dom.discardSearchResults
187+
//
188+
// For a given browser context, we can have multiple active searches. I.e.
189+
// performSearch could be called multiple times without getSearchResults or
190+
// discardSearchResults being called. We keep these active searches in the
191+
// browser context's node_search_list, which is a SearchList. Since we don't
192+
// expect many active searches (mostly just 1), a list is fine to scan through.
193+
pub const Search = struct {
194+
name: []const u8,
195+
node_ids: []const Id,
196+
197+
pub const List = struct {
198+
registry: *Registry,
199+
search_id: u16 = 0,
200+
arena: std.heap.ArenaAllocator,
201+
searches: std.ArrayListUnmanaged(Search) = .{},
202+
203+
pub fn init(allocator: Allocator, registry: *Registry) List {
204+
return .{
205+
.registry = registry,
206+
.arena = std.heap.ArenaAllocator.init(allocator),
207+
};
208+
}
209+
210+
pub fn deinit(self: *List) void {
211+
self.arena.deinit();
212+
}
213+
214+
pub fn reset(self: *List) void {
215+
self.search_id = 0;
216+
self.searches = .{};
217+
_ = self.arena.reset(.{ .retain_with_limit = 4096 });
218+
}
219+
220+
pub fn create(self: *List, nodes: []const *parser.Node) !Search {
221+
const id = self.search_id;
222+
defer self.search_id = id +% 1;
223+
224+
const arena = self.arena.allocator();
225+
226+
const name = switch (id) {
227+
0 => "0",
228+
1 => "1",
229+
2 => "2",
230+
3 => "3",
231+
4 => "4",
232+
5 => "5",
233+
6 => "6",
234+
7 => "7",
235+
8 => "8",
236+
9 => "9",
237+
else => try std.fmt.allocPrint(arena, "{d}", .{id}),
238+
};
239+
240+
var registry = self.registry;
241+
const node_ids = try arena.alloc(Id, nodes.len);
242+
for (nodes, node_ids) |node, *node_id| {
243+
node_id.* = (try registry.register(node)).id;
244+
}
245+
246+
const search = Search{
247+
.name = name,
248+
.node_ids = node_ids,
249+
};
250+
try self.searches.append(arena, search);
251+
return search;
252+
}
253+
254+
pub fn remove(self: *List, name: []const u8) void {
255+
for (self.searches.items, 0..) |search, i| {
256+
if (std.mem.eql(u8, name, search.name)) {
257+
_ = self.searches.swapRemove(i);
258+
return;
259+
}
260+
}
261+
}
262+
263+
pub fn get(self: *const List, name: []const u8) ?Search {
264+
for (self.searches.items) |search| {
265+
if (std.mem.eql(u8, name, search.name)) {
266+
return search;
267+
}
268+
}
269+
return null;
270+
}
271+
};
272+
};
273+
274+
const testing = @import("testing.zig");
275+
test "CDP Node: Registry register" {
276+
var registry = Registry.init(testing.allocator);
277+
defer registry.deinit();
278+
279+
try testing.expectEqual(0, registry.lookup_by_id.count());
280+
try testing.expectEqual(0, registry.lookup_by_node.count());
281+
282+
var doc = try testing.Document.init("<a id=a1>link1</a><div id=d2><p>other</p></div>");
283+
defer doc.deinit();
284+
285+
{
286+
const n = (try doc.querySelector("#a1")).?;
287+
const node = try registry.register(n);
288+
const n1b = registry.lookup_by_id.get(0).?;
289+
const n1c = registry.lookup_by_node.get(node._node).?;
290+
try testing.expectEqual(node, n1b);
291+
try testing.expectEqual(node, n1c);
292+
293+
try testing.expectEqual(0, node.id);
294+
try testing.expectEqual(null, node.parent_id);
295+
try testing.expectEqual(1, node.node_type);
296+
try testing.expectEqual(0, node.backend_node_id);
297+
try testing.expectEqual("A", node.node_name);
298+
try testing.expectEqual("a", node.local_name);
299+
try testing.expectEqual("", node.node_value);
300+
try testing.expectEqual(1, node.child_node_count);
301+
try testing.expectEqual(0, node.children.len);
302+
try testing.expectEqual(null, node.document_url);
303+
try testing.expectEqual(null, node.base_url);
304+
try testing.expectEqual("", node.xml_version);
305+
try testing.expectEqual(.NoQuirksMode, node.compatibility_mode);
306+
try testing.expectEqual(false, node.is_scrollable);
307+
try testing.expectEqual(n, node._node);
308+
}
309+
310+
{
311+
const n = (try doc.querySelector("p")).?;
312+
const node = try registry.register(n);
313+
const n1b = registry.lookup_by_id.get(1).?;
314+
const n1c = registry.lookup_by_node.get(node._node).?;
315+
try testing.expectEqual(node, n1b);
316+
try testing.expectEqual(node, n1c);
317+
318+
try testing.expectEqual(1, node.id);
319+
try testing.expectEqual(null, node.parent_id);
320+
try testing.expectEqual(1, node.node_type);
321+
try testing.expectEqual(1, node.backend_node_id);
322+
try testing.expectEqual("P", node.node_name);
323+
try testing.expectEqual("p", node.local_name);
324+
try testing.expectEqual("", node.node_value);
325+
try testing.expectEqual(1, node.child_node_count);
326+
try testing.expectEqual(0, node.children.len);
327+
try testing.expectEqual(null, node.document_url);
328+
try testing.expectEqual(null, node.base_url);
329+
try testing.expectEqual("", node.xml_version);
330+
try testing.expectEqual(.NoQuirksMode, node.compatibility_mode);
331+
try testing.expectEqual(false, node.is_scrollable);
332+
try testing.expectEqual(n, node._node);
333+
}
334+
}
335+
336+
test "CDP Node: search list" {
337+
var registry = Registry.init(testing.allocator);
338+
defer registry.deinit();
339+
340+
var search_list = Search.List.init(testing.allocator, &registry);
341+
defer search_list.deinit();
342+
343+
{
344+
// empty search list, noops
345+
search_list.remove("0");
346+
try testing.expectEqual(null, search_list.get("0"));
347+
}
348+
349+
{
350+
// empty nodes
351+
const s1 = try search_list.create(&.{});
352+
try testing.expectEqual("0", s1.name);
353+
try testing.expectEqual(0, s1.node_ids.len);
354+
355+
const s2 = search_list.get("0").?;
356+
try testing.expectEqual("0", s2.name);
357+
try testing.expectEqual(0, s2.node_ids.len);
358+
359+
search_list.remove("0");
360+
try testing.expectEqual(null, search_list.get("0"));
361+
}
362+
363+
{
364+
var doc = try testing.Document.init("<a id=a1></a><a id=a2></a>");
365+
defer doc.deinit();
366+
367+
const s1 = try search_list.create(try doc.querySelectorAll("a"));
368+
try testing.expectEqual("1", s1.name);
369+
try testing.expectEqualSlices(u32, &.{ 0, 1 }, s1.node_ids);
370+
371+
try testing.expectEqual(2, registry.lookup_by_id.count());
372+
try testing.expectEqual(2, registry.lookup_by_node.count());
373+
374+
const s2 = try search_list.create(try doc.querySelectorAll("#a1"));
375+
try testing.expectEqual("2", s2.name);
376+
try testing.expectEqualSlices(u32, &.{0}, s2.node_ids);
377+
378+
const s3 = try search_list.create(try doc.querySelectorAll("#a2"));
379+
try testing.expectEqual("3", s3.name);
380+
try testing.expectEqualSlices(u32, &.{1}, s3.node_ids);
381+
382+
try testing.expectEqual(2, registry.lookup_by_id.count());
383+
try testing.expectEqual(2, registry.lookup_by_node.count());
384+
}
385+
}

0 commit comments

Comments
 (0)