@@ -109,6 +109,7 @@ pub const Page = struct {
109109 err : anyerror ,
110110 parsed : void ,
111111 html : parser.Parser ,
112+ text : parser.Parser ,
112113 raw : std .ArrayListUnmanaged (u8 ),
113114 raw_done : []const u8 ,
114115 };
@@ -207,6 +208,14 @@ pub const Page = struct {
207208 return out .writeAll (buf .items );
208209 },
209210 .raw_done = > | data | return out .writeAll (data ),
211+ .text = > {
212+ // processed below, along with .html
213+ // return the <pre> element from the HTML
214+ const doc = parser .documentHTMLToDocument (self .window .document );
215+ const list = try parser .documentGetElementsByTagName (doc , "pre" );
216+ const pre = try parser .nodeListItem (list , 0 ) orelse return error .InvalidHTML ;
217+ return Dump .writeChildren (pre , .{}, out );
218+ },
210219 .html = > {
211220 // maybe page.wait timed-out, print what we have
212221 log .warn (.http , "incomplete load" , .{ .mode = "html" });
@@ -284,7 +293,7 @@ pub const Page = struct {
284293
285294 while (true ) {
286295 SW : switch (self .mode ) {
287- .pre , .raw = > {
296+ .pre , .raw , .text = > {
288297 if (self .request_intercepted ) {
289298 // the page request was intercepted.
290299
@@ -627,18 +636,27 @@ pub const Page = struct {
627636 break :blk Mime .sniff (data );
628637 } orelse .unknown ;
629638
630- const is_html = mime .isHTML ();
631- log .debug (.http , "navigate first chunk" , .{ .html = is_html , .len = data .len });
639+ log .debug (.http , "navigate first chunk" , .{ .content_type = mime .content_type , .len = data .len });
632640
633- if (is_html ) {
634- self .mode = .{ .html = try parser .Parser .init (mime .charset orelse "UTF-8" ) };
635- } else {
636- self .mode = .{ .raw = .{} };
637- }
641+ self .mode = switch (mime .content_type ) {
642+ .text_html = > .{ .html = try parser .Parser .init (mime .charset orelse "UTF-8" ) },
643+
644+ .application_json ,
645+ .text_javascript ,
646+ .text_css ,
647+ .text_plain ,
648+ = > blk : {
649+ var p = try parser .Parser .init (mime .charset orelse "UTF-8" );
650+ try p .process ("<html><head><meta charset=\" utf-8\" ></head><body><pre>" );
651+ break :blk .{ .text = p };
652+ },
653+
654+ else = > .{ .raw = .{} },
655+ };
638656 }
639657
640658 switch (self .mode ) {
641- .html = > | * p | try p .process (data ),
659+ .html , .text = > | * p | try p .process (data ),
642660 .raw = > | * buf | try buf .appendSlice (self .arena , data ),
643661 .pre = > unreachable ,
644662 .parsed = > unreachable ,
@@ -658,6 +676,13 @@ pub const Page = struct {
658676 self .mode = .{ .raw_done = buf .items };
659677 self .documentIsComplete ();
660678 },
679+ .text = > | * p | {
680+ try p .process ("</pre></body></html>" );
681+ const html_doc = p .html_doc ;
682+ p .deinit (); // don't need the parser anymore
683+ try self .setDocument (html_doc );
684+ self .documentIsComplete ();
685+ },
661686 .html = > | * p | {
662687 const html_doc = p .html_doc ;
663688 p .deinit (); // don't need the parser anymore
@@ -719,7 +744,7 @@ pub const Page = struct {
719744 self .clearTransferArena ();
720745
721746 switch (self .mode ) {
722- .html = > | * p | p .deinit (), // don't need the parser anymore
747+ .html , .text = > | * p | p .deinit (), // don't need the parser anymore
723748 else = > {},
724749 }
725750 self .mode = .{ .err = err };
0 commit comments