Skip to content

Commit 78bfdd4

Browse files
committed
Support gzip compressed content for the synchronous http client
1 parent 7f2506d commit 78bfdd4

File tree

3 files changed

+148
-1
lines changed

3 files changed

+148
-1
lines changed

src/http/client.zig

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ pub const Request = struct {
214214
if (opts.tls_verify_host) |override| {
215215
self._tls_verify_host = override;
216216
}
217+
217218
try self.prepareInitialSend();
218219
return self.doSendSync();
219220
}
@@ -905,6 +906,37 @@ const SyncHandler = struct {
905906
std.debug.assert(result.done or reader.body_reader != null);
906907
std.debug.assert(result.data == null);
907908

909+
// See CompressedReader for an explanation. This isn't great code. Sorry.
910+
if (reader.response.get("content-encoding")) |ce| {
911+
if (std.ascii.eqlIgnoreCase(ce, "gzip") == false) {
912+
log.err("unsupported content encoding '{s}' for: {}", .{ ce, request.uri });
913+
return error.UnsupportedContentEncoding;
914+
}
915+
916+
var compress_reader = CompressedReader{
917+
.over = "",
918+
.inner = &reader,
919+
.done = result.done,
920+
.buffer = state.read_buf,
921+
.data = result.unprocessed,
922+
.connection = connection,
923+
};
924+
var body: std.ArrayListUnmanaged(u8) = .{};
925+
var decompressor = std.compress.gzip.decompressor(compress_reader.reader());
926+
try decompressor.decompress(body.writer(request.arena));
927+
928+
return .{
929+
.header = reader.response,
930+
._done = true,
931+
._request = request,
932+
._peek_buf = body.items,
933+
._peek_len = body.items.len,
934+
._buf = undefined,
935+
._reader = undefined,
936+
._connection = undefined,
937+
};
938+
}
939+
908940
return .{
909941
._buf = buf,
910942
._request = request,
@@ -996,6 +1028,90 @@ const SyncHandler = struct {
9961028
}
9971029
}
9981030
};
1031+
1032+
// We don't ask for encoding, but some providers (CloudFront!!)
1033+
// encode anyways. This is an issue for our async-path because Zig's
1034+
// decompressors aren't async-friendly - they want to pull data in
1035+
// rather than being given data when it's available. Unfortunately
1036+
// this is a problem for our own Reader, which is shared by both our
1037+
// sync and async handlers, but has an async-ish API. It's hard to
1038+
// use our Reader with Zig's decompressors. Given the way our Reader
1039+
// is write, this is a problem even for our sync requests. For now, we
1040+
// just read the entire body into memory, which makes things manageable.
1041+
// Finally, we leverage the existing `peek` logic in the Response to make
1042+
// this fully-read content available.
1043+
// If you think about it, this CompressedReader is just a fancy "peek" over
1044+
// the entire body.
1045+
const CompressedReader = struct {
1046+
done: bool,
1047+
buffer: []u8,
1048+
inner: *Reader,
1049+
connection: Connection,
1050+
1051+
// Represents data directly from the socket. It hasn't been processed
1052+
// by the body reader. It could, for example, have chunk information in it.
1053+
// Needed to be processed by `inner` before it can be returned
1054+
data: ?[]u8,
1055+
1056+
// Represents data that _was_ processed by the body reader, but coudln't
1057+
// fit in the destination buffer given to read.
1058+
// This adds complexity, but the reality is that we can read more data
1059+
// from the socket than space we have in the given `dest`. Think of
1060+
// this as doing something like a BufferedReader. We _could_ limit
1061+
// our reads to dest.len, but we can overread when initially reading
1062+
// the header/response, and at that point, we don't know anything about
1063+
// this Compression stuff.
1064+
over: []const u8,
1065+
1066+
const IOReader = std.io.Reader(*CompressedReader, anyerror, read);
1067+
1068+
pub fn reader(self: *CompressedReader) IOReader {
1069+
return .{ .context = self };
1070+
}
1071+
1072+
fn read(self: *CompressedReader, dest: []u8) anyerror!usize {
1073+
if (self.over.len > 0) {
1074+
// data from a previous `read` which is ready to go as-is. i.e.
1075+
// it's already been processed by inner (the body reader).
1076+
const l = @min(self.over.len, dest.len);
1077+
@memcpy(dest[0..l], self.over[0..l]);
1078+
self.over = self.over[l..];
1079+
return l;
1080+
}
1081+
1082+
var buffer = self.buffer;
1083+
buffer = buffer[0..@min(dest.len, buffer.len)];
1084+
1085+
while (true) {
1086+
if (try self.processData()) |data| {
1087+
const l = @min(data.len, dest.len);
1088+
@memcpy(dest[0..l], data[0..l]);
1089+
1090+
// if we processed more data than fits into dest, we store
1091+
// it in `over` for the next call to `read`
1092+
self.over = data[l..];
1093+
return l;
1094+
}
1095+
1096+
if (self.done) {
1097+
return 0;
1098+
}
1099+
1100+
const n = try self.connection.read(self.buffer);
1101+
self.data = self.buffer[0..n];
1102+
}
1103+
}
1104+
1105+
fn processData(self: *CompressedReader) !?[]u8 {
1106+
const data = self.data orelse return null;
1107+
const result = try self.inner.process(data);
1108+
1109+
self.done = result.done;
1110+
self.data = result.unprocessed; // for the next call
1111+
1112+
return result.data;
1113+
}
1114+
};
9991115
};
10001116

10011117
// Used for reading the response (both the header and the body)
@@ -1576,6 +1692,13 @@ pub const Response = struct {
15761692
}
15771693

15781694
pub fn peek(self: *Response) ![]u8 {
1695+
if (self._peek_len > 0) {
1696+
// Under normal usage, this is only possible when we're dealing
1697+
// with a compressed response (despite not asking for it). We handle
1698+
// these responses by essentially peeking the entire body.
1699+
return self._peek_buf[0..self._peek_len];
1700+
}
1701+
15791702
if (try self.processData()) |data| {
15801703
// We already have some or all of the body. This happens because
15811704
// we always read as much as we can, so getting the header and
@@ -1906,6 +2029,23 @@ test "HttpClient: sync with body" {
19062029
}
19072030
}
19082031

2032+
test "HttpClient: sync with gzip body" {
2033+
for (0..2) |i| {
2034+
var client = try testClient();
2035+
defer client.deinit();
2036+
2037+
const uri = try Uri.parse("http://127.0.0.1:9582/http_client/gzip");
2038+
var req = try client.request(.GET, &uri);
2039+
var res = try req.sendSync(.{});
2040+
2041+
if (i == 0) {
2042+
try testing.expectEqual("A new browser built for machines\n", try res.peek());
2043+
}
2044+
try testing.expectEqual("A new browser built for machines\n", try res.next());
2045+
try testing.expectEqual("gzip", res.header.get("content-encoding"));
2046+
}
2047+
}
2048+
19092049
test "HttpClient: sync tls with body" {
19102050
var arr: std.ArrayListUnmanaged(u8) = .{};
19112051
defer arr.deinit(testing.allocator);

src/main.zig

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,11 @@ fn serveHTTP(address: std.net.Address) !void {
494494
.status = .moved_permanently,
495495
.extra_headers = &.{.{ .name = "LOCATION", .value = "https://127.0.0.1:9581/http_client/body" }},
496496
});
497+
} else if (std.mem.eql(u8, path, "/http_client/gzip")) {
498+
const body = &.{ 0x1f, 0x8b, 0x08, 0x08, 0x01, 0xc6, 0x19, 0x68, 0x00, 0x03, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x68, 0x74, 0x6d, 0x6c, 0x00, 0x73, 0x54, 0xc8, 0x4b, 0x2d, 0x57, 0x48, 0x2a, 0xca, 0x2f, 0x2f, 0x4e, 0x2d, 0x52, 0x48, 0x2a, 0xcd, 0xcc, 0x29, 0x51, 0x48, 0xcb, 0x2f, 0x52, 0xc8, 0x4d, 0x4c, 0xce, 0xc8, 0xcc, 0x4b, 0x2d, 0xe6, 0x02, 0x00, 0xe7, 0xc3, 0x4b, 0x27, 0x21, 0x00, 0x00, 0x00 };
499+
try request.respond(body, .{
500+
.extra_headers = &.{.{ .name = "Content-Encoding", .value = "gzip" }},
501+
});
497502
} else if (std.mem.eql(u8, path, "/http_client/echo")) {
498503
var headers: std.ArrayListUnmanaged(std.http.Header) = .{};
499504

src/runtime/testing.zig

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ pub const allocator = std.testing.allocator;
2626
// browser.Env or the browser.SessionState
2727
pub fn Runner(comptime State: type, comptime Global: type, comptime types: anytype) type {
2828
const AdjustedTypes = if (Global == void) generate.Tuple(.{ types, DefaultGlobal }) else types;
29-
const Env = js.Env(State, struct {pub const Interfaces = AdjustedTypes;});
29+
const Env = js.Env(State, struct {
30+
pub const Interfaces = AdjustedTypes;
31+
});
3032

3133
return struct {
3234
env: *Env,

0 commit comments

Comments
 (0)