Skip to content

Commit a74f791

Browse files
authored
Merge pull request #893 from lightpanda-io/dump_noscript
Add a --noscript option to "improve" --dump
2 parents 3bc654b + 3906acb commit a74f791

File tree

5 files changed

+36
-15
lines changed

5 files changed

+36
-15
lines changed

src/browser/dom/element.zig

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,13 +110,13 @@ pub const Element = struct {
110110

111111
pub fn get_innerHTML(self: *parser.Element, page: *Page) ![]const u8 {
112112
var buf = std.ArrayList(u8).init(page.arena);
113-
try dump.writeChildren(parser.elementToNode(self), buf.writer());
113+
try dump.writeChildren(parser.elementToNode(self), .{}, buf.writer());
114114
return buf.items;
115115
}
116116

117117
pub fn get_outerHTML(self: *parser.Element, page: *Page) ![]const u8 {
118118
var buf = std.ArrayList(u8).init(page.arena);
119-
try dump.writeNode(parser.elementToNode(self), buf.writer());
119+
try dump.writeNode(parser.elementToNode(self), .{}, buf.writer());
120120
return buf.items;
121121
}
122122

src/browser/dump.zig

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,14 @@ const std = @import("std");
2121
const parser = @import("netsurf.zig");
2222
const Walker = @import("dom/walker.zig").WalkerChildren;
2323

24+
pub const Opts = struct {
25+
exclude_scripts: bool = false,
26+
};
27+
2428
// writer must be a std.io.Writer
25-
pub fn writeHTML(doc: *parser.Document, writer: anytype) !void {
29+
pub fn writeHTML(doc: *parser.Document, opts: Opts, writer: anytype) !void {
2630
try writer.writeAll("<!DOCTYPE html>\n");
27-
try writeChildren(parser.documentToNode(doc), writer);
31+
try writeChildren(parser.documentToNode(doc), opts, writer);
2832
try writer.writeAll("\n");
2933
}
3034

@@ -54,10 +58,15 @@ pub fn writeDocType(doc_type: *parser.DocumentType, writer: anytype) !void {
5458
try writer.writeAll(">");
5559
}
5660

57-
pub fn writeNode(node: *parser.Node, writer: anytype) anyerror!void {
61+
pub fn writeNode(node: *parser.Node, opts: Opts, writer: anytype) anyerror!void {
5862
switch (try parser.nodeType(node)) {
5963
.element => {
6064
// open the tag
65+
const tag_type = try parser.elementHTMLGetTagType(@ptrCast(node));
66+
if (tag_type == .script and opts.exclude_scripts) {
67+
return;
68+
}
69+
6170
const tag = try parser.nodeLocalName(node);
6271
try writer.writeAll("<");
6372
try writer.writeAll(tag);
@@ -82,12 +91,12 @@ pub fn writeNode(node: *parser.Node, writer: anytype) anyerror!void {
8291
// void elements can't have any content.
8392
if (try isVoid(parser.nodeToElement(node))) return;
8493

85-
if (try parser.elementHTMLGetTagType(@ptrCast(node)) == .script) {
94+
if (tag_type == .script) {
8695
try writer.writeAll(try parser.nodeTextContent(node) orelse "");
8796
} else {
8897
// write the children
8998
// TODO avoid recursion
90-
try writeChildren(node, writer);
99+
try writeChildren(node, opts, writer);
91100
}
92101

93102
// close the tag
@@ -129,12 +138,12 @@ pub fn writeNode(node: *parser.Node, writer: anytype) anyerror!void {
129138
}
130139

131140
// writer must be a std.io.Writer
132-
pub fn writeChildren(root: *parser.Node, writer: anytype) !void {
141+
pub fn writeChildren(root: *parser.Node, opts: Opts, writer: anytype) !void {
133142
const walker = Walker{};
134143
var next: ?*parser.Node = null;
135144
while (true) {
136145
next = try walker.get_next(root, next) orelse break;
137-
try writeNode(next.?, writer);
146+
try writeNode(next.?, opts, writer);
138147
}
139148
}
140149

@@ -238,6 +247,6 @@ fn testWriteFullHTML(comptime expected: []const u8, src: []const u8) !void {
238247
defer parser.documentHTMLClose(doc_html) catch {};
239248

240249
const doc = parser.documentHTMLToDocument(doc_html);
241-
try writeHTML(doc, buf.writer(testing.allocator));
250+
try writeHTML(doc, .{}, buf.writer(testing.allocator));
242251
try testing.expectEqualStrings(expected, buf.items);
243252
}

src/browser/page.zig

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,15 +142,15 @@ pub const Page = struct {
142142
}
143143

144144
// dump writes the page content into the given file.
145-
pub fn dump(self: *const Page, out: std.fs.File) !void {
145+
pub fn dump(self: *const Page, opts: Dump.Opts, out: std.fs.File) !void {
146146
if (self.raw_data) |raw_data| {
147147
// raw_data was set if the document was not HTML, dump the data content only.
148148
return try out.writeAll(raw_data);
149149
}
150150

151151
// if the page has a pointer to a document, dumps the HTML.
152152
const doc = parser.documentHTMLToDocument(self.window.document);
153-
try Dump.writeHTML(doc, out);
153+
try Dump.writeHTML(doc, opts, out);
154154
}
155155

156156
pub fn fetchModuleSource(ctx: *anyopaque, src: []const u8) !?[]const u8 {

src/browser/xmlserializer/xmlserializer.zig

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@ pub const XMLSerializer = struct {
3636
pub fn _serializeToString(_: *const XMLSerializer, root: *parser.Node, page: *Page) ![]const u8 {
3737
var buf = std.ArrayList(u8).init(page.arena);
3838
switch (try parser.nodeType(root)) {
39-
.document => try dump.writeHTML(@as(*parser.Document, @ptrCast(root)), buf.writer()),
39+
.document => try dump.writeHTML(@as(*parser.Document, @ptrCast(root)), .{}, buf.writer()),
4040
.document_type => try dump.writeDocType(@as(*parser.DocumentType, @ptrCast(root)), buf.writer()),
41-
else => try dump.writeNode(root, buf.writer()),
41+
else => try dump.writeNode(root, .{}, buf.writer()),
4242
}
4343
return buf.items;
4444
}

src/main.zig

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ fn run(alloc: Allocator) !void {
134134

135135
// dump
136136
if (opts.dump) {
137-
try page.dump(std.io.getStdOut());
137+
try page.dump(.{ .exclude_scripts = opts.noscript }, std.io.getStdOut());
138138
}
139139
},
140140
else => unreachable,
@@ -212,6 +212,7 @@ const Command = struct {
212212
url: []const u8,
213213
dump: bool = false,
214214
common: Common,
215+
noscript: bool = false,
215216
};
216217

217218
const Common = struct {
@@ -275,6 +276,7 @@ const Command = struct {
275276
\\Options:
276277
\\--dump Dumps document to stdout.
277278
\\ Defaults to false.
279+
\\--noscript Exclude <script> tags in dump. Defaults to false.
278280
\\
279281
++ common_options ++
280282
\\
@@ -352,6 +354,9 @@ fn inferMode(opt: []const u8) ?App.RunMode {
352354
if (std.mem.eql(u8, opt, "--dump")) {
353355
return .fetch;
354356
}
357+
if (std.mem.eql(u8, opt, "--noscript")) {
358+
return .fetch;
359+
}
355360
if (std.mem.startsWith(u8, opt, "--") == false) {
356361
return .fetch;
357362
}
@@ -437,6 +442,7 @@ fn parseFetchArgs(
437442
args: *std.process.ArgIterator,
438443
) !Command.Fetch {
439444
var dump: bool = false;
445+
var noscript: bool = true;
440446
var url: ?[]const u8 = null;
441447
var common: Command.Common = .{};
442448

@@ -446,6 +452,11 @@ fn parseFetchArgs(
446452
continue;
447453
}
448454

455+
if (std.mem.eql(u8, "--noscript", opt)) {
456+
noscript = true;
457+
continue;
458+
}
459+
449460
if (try parseCommonArg(allocator, opt, args, &common)) {
450461
continue;
451462
}
@@ -471,6 +482,7 @@ fn parseFetchArgs(
471482
.url = url.?,
472483
.dump = dump,
473484
.common = common,
485+
.noscript = noscript,
474486
};
475487
}
476488

0 commit comments

Comments
 (0)