Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 23 additions & 12 deletions pkg/action/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"sync"

"github.com/chainguard-dev/clog"
"github.com/chainguard-dev/malcontent/pkg/archive"

Check failure on line 20 in pkg/action/scan.go

View workflow job for this annotation

GitHub Actions / golangci-lint

could not import github.com/chainguard-dev/malcontent/pkg/archive (-: # github.com/chainguard-dev/malcontent/pkg/archive
"github.com/chainguard-dev/malcontent/pkg/compile"
"github.com/chainguard-dev/malcontent/pkg/malcontent"
"github.com/chainguard-dev/malcontent/pkg/programkind"
Expand Down Expand Up @@ -494,6 +494,15 @@
var err error
var frs sync.Map

ft, err := programkind.File(archivePath)
if err != nil {
return nil, fmt.Errorf("failed to determine file type: %w", err)
}
if ft != nil && ft.MIME == "application/x-python-joblib" {
logger.Debugf("skipping unsupported archive: %s", archivePath)
return nil, nil
}

tmpRoot, err := archive.ExtractArchiveToTempDir(ctx, archivePath)
if err != nil {
return nil, fmt.Errorf("extract to temp: %w", err)
Expand All @@ -512,20 +521,22 @@
tmpRoot = fmt.Sprintf("/private%s", tmpRoot)
}

extractedPaths, err := findFilesRecursively(ctx, tmpRoot)
if err != nil {
return nil, fmt.Errorf("find: %w", err)
}

for _, extractedFilePath := range extractedPaths {
fr, err := processFile(ctx, c, rfs, extractedFilePath, archivePath, tmpRoot, logger)
if tmpRoot != "" {
extractedPaths, err := findFilesRecursively(ctx, tmpRoot)
if err != nil {
return nil, err
return nil, fmt.Errorf("find: %w", err)
}
if fr != nil {
// Store a clean reprepsentation of the archive's scanned file to match single file scanning behavior
clean := strings.TrimPrefix(extractedFilePath, tmpRoot)
frs.Store(clean, fr)

for _, extractedFilePath := range extractedPaths {
fr, err := processFile(ctx, c, rfs, extractedFilePath, archivePath, tmpRoot, logger)
if err != nil {
return nil, err
}
if fr != nil {
// Store a clean reprepsentation of the archive's scanned file to match single file scanning behavior
clean := strings.TrimPrefix(extractedFilePath, tmpRoot)
frs.Store(clean, fr)
}
}
}

Expand Down
31 changes: 23 additions & 8 deletions pkg/archive/archive.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package archive

Check failure on line 1 in pkg/archive/archive.go

View workflow job for this annotation

GitHub Actions / test

Please run goimports. diff --git a/pkg/archive/archive.go b/pkg/archive/archive.go index 4ac4b0d..8cb036c 100644 --- a/pkg/archive/archive.go +++ b/pkg/archive/archive.go @@ -40,8 +40,8 @@ func extractNestedArchive( isArchive = true case programkind.ArchiveMap[programkind.GetExt(f)]: isArchive = true - default: - isArchive = false + default: + isArchive = false } //nolint:nestif // ignore complexity of 8 @@ -58,8 +58,8 @@ func extractNestedArchive( if err != nil { return fmt.Errorf("failed to determine file type: %w", err) } - - switch { + + switch { case ft != nil && ft.MIME == "application/x-upx": extract = ExtractUPX case ft != nil && ft.MIME == "application/zlib": @@ -114,8 +114,8 @@ func ExtractArchiveToTempDir(ctx context.Context, path string) (string, error) { if err != nil { return "", fmt.Errorf("failed to determine file type: %w", err) } - - var isArchive bool + + var isArchive bool ft, err := programkind.File(f) if err != nil { return fmt.Errorf("failed to determine file type: %w", err) @@ -124,15 +124,15 @@ func ExtractArchiveToTempDir(ctx context.Context, path string) (string, error) { switch { case ft != nil && ft.MIME == "application/x-upx": isArchive = true - extract = ExtractUPX + extract = ExtractUPX case ft != nil && ft.MIME == "application/zlib": isArchive = true - extract = ExtractZlib + extract = ExtractZlib case programkind.ArchiveMap[programkind.GetExt(f)]: isArchive = true - extract = ExtractionMethod(programkind.GetExt(path)) - default: - isArchive = false + extract = ExtractionMethod(programkind.GetExt(path)) + default: + isArchive = false } if !isArchive || extract == nil {

Check failure on line 1 in pkg/archive/archive.go

View workflow job for this annotation

GitHub Actions / check goimports

Please run goimports. diff --git a/pkg/archive/archive.go b/pkg/archive/archive.go index 4ac4b0d..8cb036c 100644 --- a/pkg/archive/archive.go +++ b/pkg/archive/archive.go @@ -40,8 +40,8 @@ func extractNestedArchive( isArchive = true case programkind.ArchiveMap[programkind.GetExt(f)]: isArchive = true - default: - isArchive = false + default: + isArchive = false } //nolint:nestif // ignore complexity of 8 @@ -58,8 +58,8 @@ func extractNestedArchive( if err != nil { return fmt.Errorf("failed to determine file type: %w", err) } - - switch { + + switch { case ft != nil && ft.MIME == "application/x-upx": extract = ExtractUPX case ft != nil && ft.MIME == "application/zlib": @@ -114,8 +114,8 @@ func ExtractArchiveToTempDir(ctx context.Context, path string) (string, error) { if err != nil { return "", fmt.Errorf("failed to determine file type: %w", err) } - - var isArchive bool + + var isArchive bool ft, err := programkind.File(f) if err != nil { return fmt.Errorf("failed to determine file type: %w", err) @@ -124,15 +124,15 @@ func ExtractArchiveToTempDir(ctx context.Context, path string) (string, error) { switch { case ft != nil && ft.MIME == "application/x-upx": isArchive = true - extract = ExtractUPX + extract = ExtractUPX case ft != nil && ft.MIME == "application/zlib": isArchive = true - extract = ExtractZlib + extract = ExtractZlib case programkind.ArchiveMap[programkind.GetExt(f)]: isArchive = true - extract = ExtractionMethod(programkind.GetExt(path)) - default: - isArchive = false + extract = ExtractionMethod(programkind.GetExt(path)) + default: + isArchive = false } if !isArchive || extract == nil {

Check failure on line 1 in pkg/archive/archive.go

View workflow job for this annotation

GitHub Actions / check gofmt

Please run gofmt -s. diff --git a/pkg/archive/archive.go b/pkg/archive/archive.go index 4ac4b0d..8cb036c 100644 --- a/pkg/archive/archive.go +++ b/pkg/archive/archive.go @@ -40,8 +40,8 @@ func extractNestedArchive( isArchive = true case programkind.ArchiveMap[programkind.GetExt(f)]: isArchive = true - default: - isArchive = false + default: + isArchive = false } //nolint:nestif // ignore complexity of 8 @@ -58,8 +58,8 @@ func extractNestedArchive( if err != nil { return fmt.Errorf("failed to determine file type: %w", err) } - - switch { + + switch { case ft != nil && ft.MIME == "application/x-upx": extract = ExtractUPX case ft != nil && ft.MIME == "application/zlib": @@ -114,8 +114,8 @@ func ExtractArchiveToTempDir(ctx context.Context, path string) (string, error) { if err != nil { return "", fmt.Errorf("failed to determine file type: %w", err) } - - var isArchive bool + + var isArchive bool ft, err := programkind.File(f) if err != nil { return fmt.Errorf("failed to determine file type: %w", err) @@ -124,15 +124,15 @@ func ExtractArchiveToTempDir(ctx context.Context, path string) (string, error) { switch { case ft != nil && ft.MIME == "application/x-upx": isArchive = true - extract = ExtractUPX + extract = ExtractUPX case ft != nil && ft.MIME == "application/zlib": isArchive = true - extract = ExtractZlib + extract = ExtractZlib case programkind.ArchiveMap[programkind.GetExt(f)]: isArchive = true - extract = ExtractionMethod(programkind.GetExt(path)) - default: - isArchive = false + extract = ExtractionMethod(programkind.GetExt(path)) + default: + isArchive = false } if !isArchive || extract == nil {

import (
"context"
Expand Down Expand Up @@ -26,19 +26,22 @@
f string,
extracted *sync.Map,
) error {
isArchive := false
var isArchive bool
// zlib-compressed files are also archives
ft, err := programkind.File(f)
if err != nil {
return fmt.Errorf("failed to determine file type: %w", err)
}

switch {
case ft != nil && ft.MIME == "application/x-upx":
isArchive = true
case ft != nil && ft.MIME == "application/zlib":
isArchive = true
case programkind.ArchiveMap[programkind.GetExt(f)]:
isArchive = true
default:
isArchive = false
}

//nolint:nestif // ignore complexity of 8
Expand All @@ -55,7 +58,8 @@
if err != nil {
return fmt.Errorf("failed to determine file type: %w", err)
}
switch {

switch {
case ft != nil && ft.MIME == "application/x-upx":
extract = ExtractUPX
case ft != nil && ft.MIME == "application/zlib":
Expand Down Expand Up @@ -110,17 +114,28 @@
if err != nil {
return "", fmt.Errorf("failed to determine file type: %w", err)
}

var isArchive bool
ft, err := programkind.File(f)

Check failure on line 119 in pkg/archive/archive.go

View workflow job for this annotation

GitHub Actions / golangci-lint

no new variables on left side of :=

Check failure on line 119 in pkg/archive/archive.go

View workflow job for this annotation

GitHub Actions / golangci-lint

undefined: f

Check failure on line 119 in pkg/archive/archive.go

View workflow job for this annotation

GitHub Actions / golangci-lint

no new variables on left side of :=

Check failure on line 119 in pkg/archive/archive.go

View workflow job for this annotation

GitHub Actions / golangci-lint

undefined: f
if err != nil {
return fmt.Errorf("failed to determine file type: %w", err)

Check failure on line 121 in pkg/archive/archive.go

View workflow job for this annotation

GitHub Actions / golangci-lint

not enough return values

Check failure on line 121 in pkg/archive/archive.go

View workflow job for this annotation

GitHub Actions / golangci-lint

not enough return values
}

switch {
case ft != nil && ft.MIME == "application/zlib":
extract = ExtractZlib
case ft != nil && ft.MIME == "application/x-upx":
extract = ExtractUPX
default:
extract = ExtractionMethod(programkind.GetExt(path))
isArchive = true
extract = ExtractUPX
case ft != nil && ft.MIME == "application/zlib":
isArchive = true
extract = ExtractZlib
case programkind.ArchiveMap[programkind.GetExt(f)]:

Check failure on line 131 in pkg/archive/archive.go

View workflow job for this annotation

GitHub Actions / golangci-lint

undefined: f) (typecheck)

Check failure on line 131 in pkg/archive/archive.go

View workflow job for this annotation

GitHub Actions / golangci-lint

undefined: f (typecheck)
isArchive = true
extract = ExtractionMethod(programkind.GetExt(path))
default:
isArchive = false
}

if extract == nil {
if !isArchive || extract == nil {
return "", fmt.Errorf("unsupported archive type: %s", path)
}
err = extract(ctx, tmpDir, path)
Expand Down
19 changes: 18 additions & 1 deletion pkg/programkind/programkind.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"os/exec"
"path/filepath"
"regexp"
"slices"
"strings"

"github.com/gabriel-vasile/mimetype"
Expand Down Expand Up @@ -68,6 +69,7 @@ var supportedKind = map[string]string{
"hh": "text/x-h",
"html": "",
"java": "text/x-java",
"joblib": "application/x-python-joblib",
"js": "application/javascript",
"lnk": "application/x-ms-shortcut",
"lua": "text/x-lua",
Expand Down Expand Up @@ -204,7 +206,7 @@ func makeFileType(path string, ext string, mime string) *FileType {

// File detects what kind of program this file might be.
//
//nolint:cyclop // ignore complexity of 38
//nolint:cyclop // ignore complexity of 44
func File(path string) (*FileType, error) {
// Follow symlinks and return cleanly if the target does not exist
_, err := filepath.EvalSymlinks(path)
Expand Down Expand Up @@ -288,6 +290,21 @@ func File(path string) (*FileType, error) {
return Path(".gzip"), nil
case hdr[0] == '\x78' && hdr[1] == '\x5E':
return Path(".Z"), nil
// Capture joblib files that cannot be decompressed externally
// https://joblib.readthedocs.io/en/stable/generated/joblib.dump.html
// Check the header, file extension, and MIME type to be as specific as possible
case hdr[0] == '\x5A' && hdr[1] == '\x46' && hdr[2] == '\x30' && hdr[3] == '\x78':
joblibExts := []string{
".z",
".gz",
".bz2",
".xz",
".lzma",
}

if slices.Contains(joblibExts, GetExt(path)) && mtype.String() == "application/octet-stream" {
return Path(".joblib"), nil
}
}
return nil, nil
}
Expand Down
Loading