Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/codeql.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
fetch-tags: true
repository: virusTotal/yara-x
path: yara-x
ref: refs/tags/v1.8.0
ref: refs/tags/v1.9.0
- name: Install Rust for yara-x-capi
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9
with:
Expand Down
12 changes: 6 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

SAMPLES_REPO ?= chainguard-dev/malcontent-samples
SAMPLES_COMMIT ?= f948cfd0f9d2a35a2452fe43ea4d094979652103
YARA_X_REPO ?= virusTotal/yara-x
YARA_X_COMMIT ?= dcde4cba78aa122d0a1bbf293d85b0155bcba9b7

# BEGIN: lint-install ../malcontent
# http://github.com/tinkerbell/lint-install
Expand Down Expand Up @@ -52,17 +50,19 @@ $(GOLANGCI_LINT_BIN):
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(LINT_ROOT)/out/linters $(GOLANGCI_LINT_VERSION)
mv $(LINT_ROOT)/out/linters/golangci-lint $@

YARA_X_VERSION ?= v1.8.0
YARA_X_REPO ?= virusTotal/yara-x
YARA_X_VERSION ?= v1.9.0
YARA_X_COMMIT ?= 01ef5e2bc57c112448682ce6a5e6b66c4da6d6c9
YARA_X_SHA :=
ifeq ($(LINT_OS),Darwin)
ifeq ($(shell uname -m),arm64)
LINT_ARCH = aarch64
YARA_X_SHA = d8e485f6ac302a8e9ff5aba43822acebc4e5df92bc22b2fc215e740258b6adf6
YARA_X_SHA = 7b9b4d7efd762fd3d64f08fd54b16762cda49ed7c1614a329ba03def420f07c6
else
YARA_X_SHA = ac8b16726d5ed484e5ba8d383ba6fde8b3cff7f05378844c461e23164991910c
YARA_X_SHA = c47e38788a17296f7f558921657730fea78c0d5458c9379530ba9bea1d129341
endif
else
YARA_X_SHA = a80fe18143c8da080f632ecffc3fe88d5c3c6622217423f61ee4e3add01d17ec
YARA_X_SHA = 8a3a706ede8abaffbd899bfb7f009ab650502a834ead790d3596c1e0f56a0180
endif
YARA_X_BIN := $(LINT_ROOT)/out/linters/yr-$(YARA_X_VERSION)-$(LINT_ARCH)
$(YARA_X_BIN):
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ go 1.24.0
toolchain go1.24.1

require (
github.com/VirusTotal/yara-x/go v1.8.0
github.com/VirusTotal/yara-x/go v1.9.0
github.com/agext/levenshtein v1.2.3
github.com/cavaliergopher/cpio v1.0.1
github.com/cavaliergopher/rpm v1.3.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
github.com/VirusTotal/yara-x/go v1.8.0 h1:fpSYlJWBryXO51TT1plRvwiOr6rvCtfy9XSy/y96rjQ=
github.com/VirusTotal/yara-x/go v1.8.0/go.mod h1:lgXP/nkYX349MVowrtTtU5hzMdCOWQLv3+wKll9+0F8=
github.com/VirusTotal/yara-x/go v1.9.0 h1:kuB9kH4sfc0+AwR54Kk6gI5XInbk1O+QeZU9DgOrTAU=
github.com/VirusTotal/yara-x/go v1.9.0/go.mod h1:lgXP/nkYX349MVowrtTtU5hzMdCOWQLv3+wKll9+0F8=
github.com/agext/levenshtein v1.2.3 h1:YB2fHEn0UJagG8T1rrWknE3ZQzWM06O8AMAatNn7lmo=
github.com/agext/levenshtein v1.2.3/go.mod h1:JEDfjyjHDjOF/1e4FlBE/PkbqA9OfWu2ki2W0IB5558=
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
Expand Down
89 changes: 31 additions & 58 deletions pkg/action/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"context"
"errors"
"fmt"
"io"
"io/fs"
"log/slog"
"os"
Expand All @@ -17,9 +18,6 @@ import (
"strings"
"sync"
"sync/atomic"
"syscall"

"github.com/minio/sha256-simd"

"github.com/chainguard-dev/clog"
"github.com/chainguard-dev/malcontent/pkg/archive"
Expand All @@ -29,6 +27,7 @@ import (
"github.com/chainguard-dev/malcontent/pkg/programkind"
"github.com/chainguard-dev/malcontent/pkg/render"
"github.com/chainguard-dev/malcontent/pkg/report"
"github.com/minio/sha256-simd"
"golang.org/x/sync/errgroup"

yarax "github.com/VirusTotal/yara-x/go"
Expand All @@ -39,55 +38,17 @@ func interactive(c malcontent.Config) bool {
}

var (
// compiledRuleCache are a cache of previously compiled rules.
compiledRuleCache atomic.Pointer[yarax.Rules]
// compileOnce ensures that we compile rules only once even across threads.
compileOnce sync.Once
compiledRuleCache atomic.Pointer[yarax.Rules] // compiledRuleCache are a cache of previously compiled rules.
compileOnce sync.Once // compileOnce ensures that we compile rules only once even across threads.
ErrMatchedCondition = errors.New("matched exit criteria")
// initializeOnce ensures that the file and scanner pools are only initialized once.
initializeOnce sync.Once
scannerPool *pool.ScannerPool
maxMmapSize int64 = 1 << 31
initReadPool sync.Once // initReadPool ensures that the bytes read pool is only initialized once.
initScannerPool sync.Once // initScannerPool ensures that the scanner pool is only initialized once.
maxBytes int64 = 1 << 32 // 4GB
readBuffer int64 = 64 * 1024 // 64KB
readPool *pool.BufferPool
scannerPool *pool.ScannerPool
)

// scanFD scans a file descriptor using memory mapping for efficient large file handling.
// This avoids loading the entire file into memory while still using yara-x's byte slice scanning.
// scanFD also returns the file's contents for match string extraction,
// as well as the file's size and its checksum which were originally calculated separately as part of report generation.
func scanFD(scanner *yarax.Scanner, fd uintptr, size int64, logger *clog.Logger) ([]byte, *yarax.ScanResults, string, error) {
stat := &syscall.Stat_t{}
if err := syscall.Fstat(int(fd), stat); err != nil {
return nil, nil, "", fmt.Errorf("fstat failed: %w", err)
}

data, err := syscall.Mmap(int(fd), 0, int(size), syscall.PROT_READ, syscall.MAP_PRIVATE)
if err != nil {
return nil, nil, "", fmt.Errorf("mmap failed: %w", err)
}

defer func() {
if unmapErr := syscall.Munmap(data); unmapErr != nil {
logger.Error("failed to unmap memory", "error", unmapErr)
}
}()

h := sha256.New()
h.Write(data)
checksum := fmt.Sprintf("%x", h.Sum(nil))

// Create a copy of the data to return since the mmap will be unmapped
// This is necessary because report generation needs access to file content
// for match string extraction
fc := bytes.Clone(data)

mrs, err := scanner.Scan(data)
if err != nil {
return nil, nil, "", err
}

return fc, mrs, checksum, err
}

// scanSinglePath YARA scans a single path and converts it to a fileReport.
func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleFS []fs.FS, absPath string, archiveRoot string) (*malcontent.FileReport, error) {
if ctx.Err() != nil {
Expand All @@ -103,7 +64,6 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF
if err != nil {
return nil, err
}
fd := f.Fd()

fi, err := f.Stat()
if err != nil {
Expand All @@ -119,12 +79,24 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF
return fr, nil
}

if size > maxMmapSize {
logger.Warn("file exceeds mmap limit, scanning first portion only",
"size", size, "limit", maxMmapSize)
size = maxMmapSize
initReadPool.Do(func() {
readPool = pool.NewBufferPool(runtime.GOMAXPROCS(0))
})
buf := readPool.Get(readBuffer) //nolint:nilaway // the buffer pool is created above

var fc bytes.Buffer
_, err = io.CopyBuffer(&fc, io.LimitReader(f, maxBytes), buf)
if err != nil {
return nil, err
}

h := sha256.New()
_, err = h.Write(fc.Bytes())
if err != nil {
return nil, err
}
checksum := fmt.Sprintf("%x", h.Sum(nil))

mime := "<unknown>"
kind, err := programkind.File(ctx, path)
if err != nil && !interactive(c) {
Expand Down Expand Up @@ -155,14 +127,14 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF
}
}

initializeOnce.Do(func() {
initScannerPool.Do(func() {
// always create one scanner per available CPU core since the pool is used for the duration of
// a scan which may involve concurrent scans of individual files
scannerPool = pool.NewScannerPool(yrs, getMaxConcurrency(runtime.GOMAXPROCS(0)))
})
scanner := scannerPool.Get(yrs)

fc, mrs, checksum, err := scanFD(scanner, fd, size, logger)
mrs, err := scanner.ScanFile(path)
if err != nil {
logger.Debug("skipping", slog.Any("error", err))
return nil, err
Expand All @@ -180,16 +152,17 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF
return fr, nil
}

fr, err := report.Generate(ctx, path, mrs, c, archiveRoot, logger, fc, size, checksum, kind, risk)
fr, err := report.Generate(ctx, path, mrs, c, archiveRoot, logger, fc.Bytes(), size, checksum, kind, risk)
if err != nil {
return nil, NewFileReportError(err, path, TypeGenerateError)
}

defer func() {
f.Close()
readPool.Put(buf)
scannerPool.Put(scanner)
fc = nil
mrs = nil
fc.Reset()
}()

// Clean up the path if scanning an archive
Expand Down