Skip to content

Commit ec71f8e

Browse files
committed
handle text content type with HTML
For text content type (and application/json) we create a pseudo HTML tree with the text value in a <pre> tag. It allows CDP clients to interact with text content easily.
1 parent bade412 commit ec71f8e

File tree

1 file changed

+35
-10
lines changed

1 file changed

+35
-10
lines changed

src/browser/page.zig

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ pub const Page = struct {
109109
err: anyerror,
110110
parsed: void,
111111
html: parser.Parser,
112+
text: parser.Parser,
112113
raw: std.ArrayListUnmanaged(u8),
113114
raw_done: []const u8,
114115
};
@@ -207,6 +208,14 @@ pub const Page = struct {
207208
return out.writeAll(buf.items);
208209
},
209210
.raw_done => |data| return out.writeAll(data),
211+
.text => {
212+
// processed below, along with .html
213+
// return the <pre> element from the HTML
214+
const doc = parser.documentHTMLToDocument(self.window.document);
215+
const list = try parser.documentGetElementsByTagName(doc, "pre");
216+
const pre = try parser.nodeListItem(list, 0) orelse return error.InvalidHTML;
217+
return Dump.writeChildren(pre, .{}, out);
218+
},
210219
.html => {
211220
// maybe page.wait timed-out, print what we have
212221
log.warn(.http, "incomplete load", .{ .mode = "html" });
@@ -284,7 +293,7 @@ pub const Page = struct {
284293

285294
while (true) {
286295
SW: switch (self.mode) {
287-
.pre, .raw => {
296+
.pre, .raw, .text => {
288297
if (self.request_intercepted) {
289298
// the page request was intercepted.
290299

@@ -627,18 +636,27 @@ pub const Page = struct {
627636
break :blk Mime.sniff(data);
628637
} orelse .unknown;
629638

630-
const is_html = mime.isHTML();
631-
log.debug(.http, "navigate first chunk", .{ .html = is_html, .len = data.len });
639+
log.debug(.http, "navigate first chunk", .{ .content_type = mime.content_type, .len = data.len });
632640

633-
if (is_html) {
634-
self.mode = .{ .html = try parser.Parser.init(mime.charset orelse "UTF-8") };
635-
} else {
636-
self.mode = .{ .raw = .{} };
637-
}
641+
self.mode = switch (mime.content_type) {
642+
.text_html => .{ .html = try parser.Parser.init(mime.charset orelse "UTF-8") },
643+
644+
.application_json,
645+
.text_javascript,
646+
.text_css,
647+
.text_plain,
648+
=> blk: {
649+
var p = try parser.Parser.init(mime.charset orelse "UTF-8");
650+
try p.process("<html><head><meta charset=\"utf-8\"></head><body><pre>");
651+
break :blk .{ .text = p };
652+
},
653+
654+
else => .{ .raw = .{} },
655+
};
638656
}
639657

640658
switch (self.mode) {
641-
.html => |*p| try p.process(data),
659+
.html, .text => |*p| try p.process(data),
642660
.raw => |*buf| try buf.appendSlice(self.arena, data),
643661
.pre => unreachable,
644662
.parsed => unreachable,
@@ -658,6 +676,13 @@ pub const Page = struct {
658676
self.mode = .{ .raw_done = buf.items };
659677
self.documentIsComplete();
660678
},
679+
.text => |*p| {
680+
try p.process("</pre></body></html>");
681+
const html_doc = p.html_doc;
682+
p.deinit(); // don't need the parser anymore
683+
try self.setDocument(html_doc);
684+
self.documentIsComplete();
685+
},
661686
.html => |*p| {
662687
const html_doc = p.html_doc;
663688
p.deinit(); // don't need the parser anymore
@@ -719,7 +744,7 @@ pub const Page = struct {
719744
self.clearTransferArena();
720745

721746
switch (self.mode) {
722-
.html => |*p| p.deinit(), // don't need the parser anymore
747+
.html, .text => |*p| p.deinit(), // don't need the parser anymore
723748
else => {},
724749
}
725750
self.mode = .{ .err = err };

0 commit comments

Comments
 (0)