Skip to content

Commit f50c647

Browse files
gooncreeperandrewrk
authored andcommitted
add deflate compression, simplify decompression
Implements deflate compression from scratch. A history window is kept in the writer's buffer for matching and a chained hash table is used to find matches. Tokens are accumulated until a threshold is reached and then outputted as a block. Flush is used to indicate end of stream. Additionally, two other deflate writers are provided: * `Raw` writes only in store blocks (the uncompressed bytes). It utilizes data vectors to efficiently send block headers and data. * `Huffman` only performs Huffman compression on data and no matching. The above are also able to take advantage of writer semantics since they do not need to keep a history. Literal and distance code parameters in `token` have also been reworked. Their parameters are now derived mathematically, however the more expensive ones are still obtained through a lookup table (expect on ReleaseSmall). Decompression bit reading has been greatly simplified, taking advantage of the ability to peek on the underlying reader. Additionally, a few bugs with limit handling have been fixed.
1 parent e79a00a commit f50c647

File tree

8 files changed

+2930
-2033
lines changed

8 files changed

+2930
-2033
lines changed

lib/std/compress/flate.zig

Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
const std = @import("../std.zig");
22

3-
/// When decompressing, the output buffer is used as the history window, so
4-
/// less than this may result in failure to decompress streams that were
5-
/// compressed with a larger window.
3+
/// When compressing and decompressing, the provided buffer is used as the
4+
/// history window, so it must be at least this size.
65
pub const max_window_len = history_len * 2;
76

87
pub const history_len = 32768;
@@ -15,10 +14,6 @@ pub const Compress = @import("flate/Compress.zig");
1514
/// produces the original full-size data.
1615
pub const Decompress = @import("flate/Decompress.zig");
1716

18-
/// Compression without Lempel-Ziv match searching. Faster compression, less
19-
/// memory requirements but bigger compressed sizes.
20-
pub const HuffmanEncoder = @import("flate/HuffmanEncoder.zig");
21-
2217
/// Container of the deflate bit stream body. Container adds header before
2318
/// deflate bit stream and footer after. It can bi gzip, zlib or raw (no header,
2419
/// no footer, raw bit stream).
@@ -112,37 +107,32 @@ pub const Container = enum {
112107
switch (h.*) {
113108
.raw => {},
114109
.gzip => |*gzip| {
115-
gzip.update(buf);
116-
gzip.count +%= buf.len;
110+
gzip.crc.update(buf);
111+
gzip.count +%= @truncate(buf.len);
117112
},
118113
.zlib => |*zlib| {
119114
zlib.update(buf);
120115
},
121-
inline .gzip, .zlib => |*x| x.update(buf),
122116
}
123117
}
124118

125119
pub fn writeFooter(hasher: *Hasher, writer: *std.Io.Writer) std.Io.Writer.Error!void {
126-
var bits: [4]u8 = undefined;
127120
switch (hasher.*) {
128121
.gzip => |*gzip| {
129122
// GZIP 8 bytes footer
130123
// - 4 bytes, CRC32 (CRC-32)
131-
// - 4 bytes, ISIZE (Input SIZE) - size of the original (uncompressed) input data modulo 2^32
132-
std.mem.writeInt(u32, &bits, gzip.final(), .little);
133-
try writer.writeAll(&bits);
134-
135-
std.mem.writeInt(u32, &bits, gzip.bytes_read, .little);
136-
try writer.writeAll(&bits);
124+
// - 4 bytes, ISIZE (Input SIZE) - size of the original
125+
// (uncompressed) input data modulo 2^32
126+
try writer.writeInt(u32, gzip.crc.final(), .little);
127+
try writer.writeInt(u32, gzip.count, .little);
137128
},
138129
.zlib => |*zlib| {
139130
// ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
140131
// 4 bytes of ADLER32 (Adler-32 checksum)
141132
// Checksum value of the uncompressed data (excluding any
142133
// dictionary data) computed according to Adler-32
143134
// algorithm.
144-
std.mem.writeInt(u32, &bits, zlib.final, .big);
145-
try writer.writeAll(&bits);
135+
try writer.writeInt(u32, zlib.adler, .big);
146136
},
147137
.raw => {},
148138
}
@@ -174,7 +164,6 @@ pub const Container = enum {
174164
};
175165

176166
test {
177-
_ = HuffmanEncoder;
178167
_ = Compress;
179168
_ = Decompress;
180169
}

0 commit comments

Comments
 (0)