Skip to content

Commit b9f6146

Browse files
committed
Try to sniff the mime type based on the body content
Synchronous body reader now exposes a peek() function to get the first few bytes from the response body. This will be no less than 100 bytes (assuming the body is that big), but could be more. Streaming API, via res.next() continues to work as-is even if peek() is called. Introduce Mime.sniff() that detects a few common types - the ones that we care about right now - from the body content.
1 parent 66ec087 commit b9f6146

File tree

5 files changed

+342
-159
lines changed

5 files changed

+342
-159
lines changed

src/browser/browser.zig

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -435,24 +435,19 @@ pub const Page = struct {
435435

436436
log.info("GET {any} {d}", .{ url, header.status });
437437

438-
const ct = blk: {
439-
break :blk header.get("content-type") orelse {
440-
// no content type in HTTP headers.
441-
// TODO try to sniff mime type from the body.
442-
log.info("no content-type HTTP header", .{});
443-
444-
// Assume it's HTML for now.
445-
break :blk "text/html; charset=utf-8";
446-
};
447-
};
438+
const content_type = header.get("content-type");
448439

449-
log.debug("header content-type: {s}", .{ct});
450-
var mime = try Mime.parse(arena, ct);
440+
const mime: Mime = blk: {
441+
if (content_type) |ct| {
442+
break :blk try Mime.parse(arena, ct);
443+
}
444+
break :blk Mime.sniff(try response.peek());
445+
} orelse .unknown;
451446

452447
if (mime.isHTML()) {
453448
try self.loadHTMLDoc(&response, mime.charset orelse "utf-8");
454449
} else {
455-
log.info("non-HTML document: {s}", .{ct});
450+
log.info("non-HTML document: {s}", .{content_type orelse "null"});
456451
var arr: std.ArrayListUnmanaged(u8) = .{};
457452
while (try response.next()) |data| {
458453
try arr.appendSlice(arena, try arena.dupe(u8, data));

0 commit comments

Comments
 (0)