Skip to content

Commit 1a4f693

Browse files
authored
feat: add zstd decompression support for OCI archives (#717)
* feat: add zstd decompression support for OCI archives - Add decompressMaybe() to handle gzip, zstd, and uncompressed data - Implement streaming zstd decompression via fzstd library - Fix archive type detection for image identifiers with containerd - Add unit tests for decompressMaybe * chore: pr comments
1 parent 63b600a commit 1a4f693

File tree

7 files changed

+684
-20
lines changed

7 files changed

+684
-20
lines changed

lib/extractor/decompress-maybe.ts

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
import { Decompress as ZstdDecompress } from "fzstd";
2+
import { Transform } from "stream";
3+
import { createGunzip } from "zlib";
4+
5+
/**
6+
* Creates a transform stream that automatically detects and decompresses data based on magic numbers.
7+
*
8+
* Supports three formats:
9+
* - gzip (magic: 1f 8b) - Streamed through Node.js built-in zlib
10+
* - zstd (magic: 28 b5 2f fd) - Streamed through fzstd library
11+
* - uncompressed - Passed through unchanged
12+
*
13+
* Both gzip and zstd use streaming decompression to avoid buffering entire layers in memory.
14+
* This is critical for handling large image layers (multiple GB) without excessive memory usage.
15+
*
16+
* OCI images from containerd may use zstd compression, while older Docker archives use gzip.
17+
* Manifest and config files within OCI archives are typically uncompressed JSON.
18+
*
19+
* Named after the gunzip-maybe library, which only handled gzip detection.
20+
*/
21+
export function decompressMaybe(): Transform {
22+
let headerRead = false;
23+
let compressionType: "gzip" | "zstd" | "none" | null = null;
24+
let gzipStream: Transform | null = null;
25+
let zstdStream: ZstdDecompress | null = null;
26+
const buffer: Buffer[] = [];
27+
28+
const transform = new Transform({
29+
transform(chunk: Buffer, _encoding, callback) {
30+
if (!headerRead) {
31+
buffer.push(chunk);
32+
const combined = Buffer.concat(buffer);
33+
34+
// Check for gzip magic number (1f 8b)
35+
if (
36+
combined.length >= 2 &&
37+
combined[0] === 0x1f &&
38+
combined[1] === 0x8b
39+
) {
40+
compressionType = "gzip";
41+
headerRead = true;
42+
43+
// Setup gzip decompressor
44+
gzipStream = createGunzip();
45+
gzipStream.on("data", (data: Buffer) => transform.push(data));
46+
gzipStream.on("error", (err: Error) => transform.destroy(err));
47+
48+
// Write buffered data
49+
gzipStream.write(combined);
50+
buffer.length = 0;
51+
callback();
52+
}
53+
// Check for zstd magic number (28 b5 2f fd)
54+
else if (
55+
combined.length >= 4 &&
56+
combined[0] === 0x28 &&
57+
combined[1] === 0xb5 &&
58+
combined[2] === 0x2f &&
59+
combined[3] === 0xfd
60+
) {
61+
compressionType = "zstd";
62+
headerRead = true;
63+
64+
// Setup zstd decompressor with streaming API
65+
zstdStream = new ZstdDecompress(
66+
(data: Uint8Array, final?: boolean) => {
67+
transform.push(Buffer.from(data));
68+
},
69+
);
70+
71+
// Write buffered data
72+
try {
73+
zstdStream.push(new Uint8Array(combined), false);
74+
} catch (err) {
75+
callback(
76+
new Error(
77+
`zstd decompression failed: ${
78+
err instanceof Error ? err.message : String(err)
79+
}`,
80+
),
81+
);
82+
return;
83+
}
84+
buffer.length = 0;
85+
callback();
86+
}
87+
// After 8 bytes, assume uncompressed
88+
else if (combined.length >= 8) {
89+
compressionType = "none";
90+
headerRead = true;
91+
92+
// Push buffered data as-is
93+
this.push(combined);
94+
buffer.length = 0;
95+
callback();
96+
} else {
97+
// Need more data
98+
callback();
99+
}
100+
} else {
101+
// Header already read
102+
if (compressionType === "gzip" && gzipStream) {
103+
gzipStream.write(chunk);
104+
callback();
105+
} else if (compressionType === "zstd" && zstdStream) {
106+
try {
107+
zstdStream.push(new Uint8Array(chunk), false);
108+
callback();
109+
} catch (err) {
110+
callback(
111+
new Error(
112+
`zstd decompression failed: ${
113+
err instanceof Error ? err.message : String(err)
114+
}`,
115+
),
116+
);
117+
}
118+
} else {
119+
// No compression
120+
callback(null, chunk);
121+
}
122+
}
123+
},
124+
125+
async flush(callback) {
126+
if (compressionType === "gzip" && gzipStream) {
127+
gzipStream.once("end", () => callback());
128+
gzipStream.end();
129+
} else if (compressionType === "zstd" && zstdStream) {
130+
// Signal end of zstd stream
131+
try {
132+
zstdStream.push(new Uint8Array(0), true);
133+
callback();
134+
} catch (err) {
135+
callback(
136+
new Error(
137+
`zstd decompression failed: ${
138+
err instanceof Error ? err.message : String(err)
139+
}`,
140+
),
141+
);
142+
}
143+
} else if (!headerRead && buffer.length > 0) {
144+
// Stream ended before determining compression, assume uncompressed
145+
this.push(Buffer.concat(buffer));
146+
callback();
147+
} else {
148+
callback();
149+
}
150+
},
151+
});
152+
153+
return transform;
154+
}

lib/extractor/index.ts

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -117,21 +117,24 @@ export async function extractImageContent(
117117
let archiveContent: ExtractedLayersAndManifest;
118118

119119
if (!extractors.has(imageType)) {
120-
// default to Docker extractor if image type is unknown
121-
imageType = ImageType.DockerArchive;
122-
}
123-
extractor = extractors.get(imageType) as ArchiveExtractor;
120+
// Unknown image type - try all extractors to auto-detect format
121+
[archiveContent, extractor] = await extractArchiveContentFallback(
122+
extractors,
123+
);
124+
} else {
125+
extractor = extractors.get(imageType) as ArchiveExtractor;
124126

125-
try {
126-
archiveContent = await extractor.getLayersAndManifest();
127-
} catch (err) {
128-
if (err instanceof InvalidArchiveError) {
129-
// fallback to the other extractor if layer extraction failed
130-
[archiveContent, extractor] = await extractArchiveContentFallback(
131-
extractors,
132-
);
133-
} else {
134-
throw err;
127+
try {
128+
archiveContent = await extractor.getLayersAndManifest();
129+
} catch (err) {
130+
if (err instanceof InvalidArchiveError) {
131+
// fallback to the other extractor if layer extraction failed
132+
[archiveContent, extractor] = await extractArchiveContentFallback(
133+
extractors,
134+
);
135+
} else {
136+
throw err;
137+
}
135138
}
136139
}
137140

lib/extractor/layer.ts

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,19 @@
11
import * as Debug from "debug";
2-
import * as gunzip from "gunzip-maybe";
32
import * as path from "path";
43
import { Readable } from "stream";
54
import { extract, Extract } from "tar-stream";
65
import { isWhitedOutFile } from ".";
76
import { applyCallbacks, isResultEmpty } from "./callbacks";
7+
import { decompressMaybe } from "./decompress-maybe";
88
import { ExtractAction, ExtractedLayers } from "./types";
99

1010
const debug = Debug("snyk");
1111

1212
/**
1313
* Extract key files from the specified TAR stream.
14+
*
15+
* Layer streams may be compressed with gzip, zstd, or uncompressed.
16+
* The decompressMaybe transform handles all three formats automatically.
1417
* @param layerTarStream image layer as a Readable TAR stream. Note: consumes the stream.
1518
* @param extractActions array of pattern, callbacks pairs
1619
* @returns extracted file products
@@ -36,7 +39,6 @@ export async function extractImageLayer(
3639
stream,
3740
headers.size,
3841
);
39-
4042
if (
4143
!isResultEmpty(callbackResult) ||
4244
isWhitedOutFile(absoluteFileName)
@@ -49,6 +51,7 @@ export async function extractImageLayer(
4951
`Exception thrown while applying callbacks during image layer extraction: ${error.message}`,
5052
);
5153
reject(error);
54+
return;
5255
}
5356
} else if (isWhitedOutFile(absoluteFileName)) {
5457
result[absoluteFileName] = {};
@@ -66,6 +69,6 @@ export async function extractImageLayer(
6669

6770
tarExtractor.on("error", (error) => reject(error));
6871

69-
layerTarStream.pipe(gunzip()).pipe(tarExtractor);
72+
layerTarStream.pipe(decompressMaybe()).pipe(tarExtractor);
7073
});
7174
}

lib/extractor/oci-archive/layer.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
import * as Debug from "debug";
22
import { createReadStream } from "fs";
3-
import * as gunzip from "gunzip-maybe";
43
import { normalize as normalizePath, sep as pathSeparator } from "path";
54
import { PassThrough } from "stream";
65
import { extract, Extract } from "tar-stream";
76
import { getPlatformFromConfig, InvalidArchiveError } from "..";
87
import { streamToJson } from "../../stream-utils";
98
import { PluginOptions } from "../../types";
9+
import { decompressMaybe } from "../decompress-maybe";
1010
import { extractImageLayer } from "../layer";
1111
import {
1212
ExtractAction,
@@ -30,6 +30,12 @@ const MEDIATYPE_OCI_MANIFEST_LIST_V1 =
3030

3131
/**
3232
* Retrieve the products of files content from the specified oci-archive.
33+
*
34+
* OCI archives contain multiple blobs (configs, manifests, layers). For each blob,
35+
* we attempt to parse it as both JSON (for configs/manifests) and as a compressed layer
36+
* tarball. Most attempts will fail gracefully (a layer isn't valid JSON, a manifest isn't
37+
* a valid tarball), which is expected behavior.
38+
*
3339
* @param ociArchiveFilesystemPath Path to image file saved in oci-archive format.
3440
* @param extractActions Array of pattern-callbacks pairs.
3541
* @param options PluginOptions
@@ -79,7 +85,7 @@ export async function extractArchive(
7985
} else if (isImageIndexFile(manifest)) {
8086
indexFiles[digest] = manifest as OciImageIndex;
8187
} else if (isImageConfigFile(manifest)) {
82-
configs.push(manifest);
88+
configs.push(manifest as ImageConfig);
8389
}
8490
if (layer !== undefined) {
8591
layers[digest] = layer as ExtractedLayers;
@@ -116,7 +122,7 @@ export async function extractArchive(
116122
});
117123

118124
createReadStream(ociArchiveFilesystemPath)
119-
.pipe(gunzip())
125+
.pipe(decompressMaybe())
120126
.pipe(tarExtractor);
121127
});
122128
}

0 commit comments

Comments
 (0)