Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 26 additions & 2 deletions lib/extractor/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ function layersWithLatestFileModifications(
// if finding a deleted file - trimming to its original file name for excluding it from extractedLayers
// + not adding this file
if (isWhitedOutFile(filename)) {
removedFilesToIgnore.add(filename.replace(/.wh./, ""));
removedFilesToIgnore.add(removeWhiteoutPrefix(filename));
continue;
}
// not adding previously found to be whited out files to extractedLayers
Expand All @@ -248,8 +248,32 @@ function layersWithLatestFileModifications(
return extractedLayers;
}

/**
* check if a file is 'whited out', which is shown by
* prefixing the filename with a .wh.
* https://www.madebymikal.com/interpreting-whiteout-files-in-docker-image-layers
* https://github.com/opencontainers/image-spec/blob/main/layer.md#whiteouts
*/
export function isWhitedOutFile(filename: string) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this function also need to account for opaque whiteouts: https://github.com/opencontainers/image-spec/blob/main/layer.md#opaque-whiteout?

In other words, check that the filename is not .wh..wh..opq? Looking above, it's not clear our extractor logic even handles opaque whiteouts at all 🤔

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think as long as it starts with .wh it should account for it, it looks like out logic checks if a file is or within any whited out file or directory, so if a directory starts with .wh, any files under it should be excluded. I can manually verify though

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I misunderstood how .wh..wh..opq worked, I'll fix this

return filename.match(/.wh./gm);
const lastSlashIndex = filename.lastIndexOf("/");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this assumes unix path separator -- do we need to handle windows too?

Copy link
Contributor Author

@parker-snyk parker-snyk Aug 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just updated to use path.basename so it should account for this now - https://nodejs.org/api/path.html#windows-vs-posix
I didn't know about this function until you mentioned it, so let me know if I'm incorrect


if (lastSlashIndex === -1) {
// it's a file name, not a path
return filename.startsWith(".wh.");
} else {
// it's a path, so check the last part
const filenameToCheck = filename.substring(lastSlashIndex + 1);
return filenameToCheck.startsWith(".wh.");
}
}

/**
* Remove the .wh. prefix from a whiteout file to get the original filename
*/
export function removeWhiteoutPrefix(filename: string): string {
// Replace .wh. that appears at the start or after the last slash,
// and ensure no slashes come after .wh.
return filename.replace(/^(.*\/)?\.wh\.([^\/]*)$/, "$1$2");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you think this should be "+" instead? I guess we expect 0 times to be a bug in the generator (i.e., impossible) but we should still remove it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(The comment applied to the * in [^\/]*)

}

function isBufferType(type: FileContent): type is Buffer {
Expand Down
92 changes: 91 additions & 1 deletion test/lib/extractor/index.spec.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
import { getContentAsString } from "../../../lib/extractor";
import {
getContentAsString,
isWhitedOutFile,
removeWhiteoutPrefix,
} from "../../../lib/extractor";
import { ExtractAction, ExtractedLayers } from "../../../lib/extractor/types";

describe("index", () => {
Expand All @@ -18,3 +22,89 @@ describe("index", () => {
expect(result).toEqual("Hello, world!");
});
});

describe("isWhitedOutFile", () => {
test("should return true for files containing .wh. in their path", () => {
expect(isWhitedOutFile("/etc/.wh.hosts")).toBe(true);
expect(isWhitedOutFile("/var/lib/.wh.data")).toBe(true);
expect(isWhitedOutFile("/.wh.config")).toBe(true);
});

test("should return false for files not containing .wh.", () => {
expect(isWhitedOutFile("/etc/hosts")).toBe(false);
expect(isWhitedOutFile("")).toBe(false);
expect(isWhitedOutFile("/")).toBe(false);
});

test("should return false for similar but different patterns", () => {
// make sure the dots are literal and not match all
expect(isWhitedOutFile("/etc/wh.hosts")).toBe(false);
expect(isWhitedOutFile("/etc/.whosts")).toBe(false);
expect(isWhitedOutFile("/etc/whhosts")).toBe(false);

// dots in wrong places
expect(isWhitedOutFile("/etc/.w.h.hosts")).toBe(false);
expect(isWhitedOutFile("/etc/..wh..hosts")).toBe(false);

// case sensitive
expect(isWhitedOutFile("/etc/.WH.hosts")).toBe(false);
expect(isWhitedOutFile("/etc/.Wh.hosts")).toBe(false);
});

test("should handle .wh. at different positions", () => {
expect(isWhitedOutFile(".wh.start")).toBe(true);
expect(isWhitedOutFile("middle.wh.file")).toBe(false);
expect(isWhitedOutFile("end.wh.")).toBe(false);
expect(isWhitedOutFile("/deeply/nested/path/.wh.present")).toBe(true);
expect(isWhitedOutFile("/the/.wh./in/path/present")).toBe(false);
});
});

describe("removeWhiteoutPrefix", () => {
test("should remove .wh. prefix from filenames without slashes", () => {
expect(removeWhiteoutPrefix(".wh.hosts")).toBe("hosts");
expect(removeWhiteoutPrefix(".wh.data")).toBe("data");
expect(removeWhiteoutPrefix(".wh.config")).toBe("config");
expect(removeWhiteoutPrefix(".wh.")).toBe("");
expect(removeWhiteoutPrefix(".wh.file.txt")).toBe("file.txt");
});

test("should remove .wh. prefix after the last slash in paths", () => {
expect(removeWhiteoutPrefix("/etc/.wh.hosts")).toBe("/etc/hosts");
expect(removeWhiteoutPrefix("/var/lib/.wh.data")).toBe("/var/lib/data");
expect(removeWhiteoutPrefix("/.wh.config")).toBe("/config");
expect(removeWhiteoutPrefix("/deeply/nested/path/.wh.present")).toBe(
"/deeply/nested/path/present",
);
expect(removeWhiteoutPrefix("/path/to/.wh.")).toBe("/path/to/");
});

test("should not modify files that don't have .wh. prefix in the correct position", () => {
expect(removeWhiteoutPrefix("normal.file")).toBe("normal.file");
expect(removeWhiteoutPrefix("/etc/hosts")).toBe("/etc/hosts");
expect(removeWhiteoutPrefix("middle.wh.file")).toBe("middle.wh.file");
expect(removeWhiteoutPrefix("/path/middle.wh.file")).toBe(
"/path/middle.wh.file",
);
expect(removeWhiteoutPrefix(".whfile")).toBe(".whfile");
expect(removeWhiteoutPrefix("/path/.whfile")).toBe("/path/.whfile");
expect(removeWhiteoutPrefix("/xwh.txt")).toBe("/xwh.txt");
});

test("should handle edge cases", () => {
expect(removeWhiteoutPrefix("")).toBe("");
expect(removeWhiteoutPrefix("/")).toBe("/");
expect(removeWhiteoutPrefix("//")).toBe("//");
expect(removeWhiteoutPrefix("/.wh.")).toBe("/");
expect(removeWhiteoutPrefix("//.wh.test")).toBe("//test");
});

test("should not remove .wh. that appears in the middle of paths", () => {
expect(removeWhiteoutPrefix("/the/.wh./in/path/file")).toBe(
"/the/.wh./in/path/file",
);
expect(removeWhiteoutPrefix("/path/.wh.dir/.wh.file")).toBe(
"/path/.wh.dir/file",
);
});
});
79 changes: 64 additions & 15 deletions test/system/application-scans/__snapshots__/node.spec.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -55967,11 +55967,6 @@ Array [
"nodeId": "[email protected]",
"pkgId": "[email protected]",
},
Object {
"deps": Array [],
"nodeId": "[email protected]",
"pkgId": "[email protected]",
},
Object {
"deps": Array [],
"nodeId": "[email protected]",
Expand Down Expand Up @@ -56605,6 +56600,20 @@ Array [
"nodeId": "[email protected]",
"pkgId": "[email protected]",
},
Object {
"deps": Array [],
"nodeId": "[email protected]",
"pkgId": "[email protected]",
},
Object {
"deps": Array [
Object {
"nodeId": "[email protected]",
},
],
"nodeId": "[email protected]",
"pkgId": "[email protected]",
},
Object {
"deps": Array [
Object {
Expand Down Expand Up @@ -56637,6 +56646,9 @@ Array [
Object {
"nodeId": "[email protected]",
},
Object {
"nodeId": "[email protected]",
},
],
"nodeId": "[email protected]",
"pkgId": "[email protected]",
Expand Down Expand Up @@ -56679,6 +56691,9 @@ Array [
Object {
"nodeId": "[email protected]",
},
Object {
"nodeId": "[email protected]",
},
],
"nodeId": "[email protected]",
"pkgId": "[email protected]",
Expand Down Expand Up @@ -57106,6 +57121,9 @@ Array [
Object {
"nodeId": "[email protected]",
},
Object {
"nodeId": "[email protected]",
},
],
"nodeId": "[email protected]",
"pkgId": "[email protected]",
Expand Down Expand Up @@ -57584,6 +57602,9 @@ Array [
Object {
"nodeId": "[email protected]",
},
Object {
"nodeId": "[email protected]",
},
],
"nodeId": "[email protected]",
"pkgId": "[email protected]",
Expand Down Expand Up @@ -58137,6 +58158,9 @@ Array [
Object {
"nodeId": "[email protected]",
},
Object {
"nodeId": "[email protected]",
},
],
"nodeId": "[email protected]",
"pkgId": "[email protected]",
Expand Down Expand Up @@ -58246,6 +58270,11 @@ Array [
"nodeId": "[email protected]",
"pkgId": "[email protected]",
},
Object {
"deps": Array [],
"nodeId": "[email protected]",
"pkgId": "[email protected]",
},
Object {
"deps": Array [],
"nodeId": "[email protected]",
Expand Down Expand Up @@ -58289,6 +58318,9 @@ Array [
Object {
"nodeId": "[email protected]",
},
Object {
"nodeId": "[email protected]",
},
Object {
"nodeId": "[email protected]",
},
Expand Down Expand Up @@ -58316,6 +58348,9 @@ Array [
Object {
"nodeId": "[email protected]",
},
Object {
"nodeId": "[email protected]",
},
Object {
"nodeId": "[email protected]",
},
Expand Down Expand Up @@ -58913,9 +58948,6 @@ Array [
Object {
"nodeId": "[email protected]",
},
Object {
"nodeId": "[email protected]",
},
Object {
"nodeId": "[email protected]",
},
Expand Down Expand Up @@ -59153,6 +59185,9 @@ Array [
Object {
"nodeId": "[email protected]",
},
Object {
"nodeId": "[email protected]",
},
Object {
"nodeId": "[email protected]",
},
Expand Down Expand Up @@ -60080,13 +60115,6 @@ Array [
"version": "3.0.0",
},
},
Object {
"id": "[email protected]",
"info": Object {
"name": "isexe",
"version": "2.0.0",
},
},
Object {
"id": "[email protected]",
"info": Object {
Expand Down Expand Up @@ -60542,6 +60570,20 @@ Array [
"version": "4.4.13",
},
},
Object {
"id": "[email protected]",
"info": Object {
"name": "isexe",
"version": "2.0.0",
},
},
Object {
"id": "[email protected]",
"info": Object {
"name": "which",
"version": "1.3.1",
},
},
Object {
"id": "[email protected]",
"info": Object {
Expand Down Expand Up @@ -61550,6 +61592,13 @@ Array [
"version": "1.0.1",
},
},
Object {
"id": "[email protected]",
"info": Object {
"name": "which-module",
"version": "2.0.0",
},
},
Object {
"id": "[email protected]",
"info": Object {
Expand Down
2 changes: 1 addition & 1 deletion test/system/application-scans/node.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ describe("node application scans", () => {
const result = await extractContent([getNodeAppFileContentAction], {
path: imageNameAndTag,
});
expect(Object.keys(result.extractedLayers).length).toEqual(608);
expect(Object.keys(result.extractedLayers).length).toEqual(610);
Object.keys(result.extractedLayers).forEach((fileName) => {
expect(
fileName.endsWith("/package.json") ||
Expand Down
Loading