Skip to content

Commit 5f44f86

Browse files
authored
Update yara-x to 1.9.0; use new ScanFile functionality (#1198)
Signed-off-by: egibs <[email protected]>
1 parent ec2a923 commit 5f44f86

File tree

5 files changed

+41
-68
lines changed

5 files changed

+41
-68
lines changed

.github/workflows/codeql.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ jobs:
3636
fetch-tags: true
3737
repository: virusTotal/yara-x
3838
path: yara-x
39-
ref: refs/tags/v1.8.0
39+
ref: refs/tags/v1.9.0
4040
- name: Install Rust for yara-x-capi
4141
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9
4242
with:

Makefile

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44

55
SAMPLES_REPO ?= chainguard-dev/malcontent-samples
66
SAMPLES_COMMIT ?= f948cfd0f9d2a35a2452fe43ea4d094979652103
7-
YARA_X_REPO ?= virusTotal/yara-x
8-
YARA_X_COMMIT ?= dcde4cba78aa122d0a1bbf293d85b0155bcba9b7
97

108
# BEGIN: lint-install ../malcontent
119
# http://github.com/tinkerbell/lint-install
@@ -52,17 +50,19 @@ $(GOLANGCI_LINT_BIN):
5250
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(LINT_ROOT)/out/linters $(GOLANGCI_LINT_VERSION)
5351
mv $(LINT_ROOT)/out/linters/golangci-lint $@
5452

55-
YARA_X_VERSION ?= v1.8.0
53+
YARA_X_REPO ?= virusTotal/yara-x
54+
YARA_X_VERSION ?= v1.9.0
55+
YARA_X_COMMIT ?= 01ef5e2bc57c112448682ce6a5e6b66c4da6d6c9
5656
YARA_X_SHA :=
5757
ifeq ($(LINT_OS),Darwin)
5858
ifeq ($(shell uname -m),arm64)
5959
LINT_ARCH = aarch64
60-
YARA_X_SHA = d8e485f6ac302a8e9ff5aba43822acebc4e5df92bc22b2fc215e740258b6adf6
60+
YARA_X_SHA = 7b9b4d7efd762fd3d64f08fd54b16762cda49ed7c1614a329ba03def420f07c6
6161
else
62-
YARA_X_SHA = ac8b16726d5ed484e5ba8d383ba6fde8b3cff7f05378844c461e23164991910c
62+
YARA_X_SHA = c47e38788a17296f7f558921657730fea78c0d5458c9379530ba9bea1d129341
6363
endif
6464
else
65-
YARA_X_SHA = a80fe18143c8da080f632ecffc3fe88d5c3c6622217423f61ee4e3add01d17ec
65+
YARA_X_SHA = 8a3a706ede8abaffbd899bfb7f009ab650502a834ead790d3596c1e0f56a0180
6666
endif
6767
YARA_X_BIN := $(LINT_ROOT)/out/linters/yr-$(YARA_X_VERSION)-$(LINT_ARCH)
6868
$(YARA_X_BIN):

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ go 1.24.0
55
toolchain go1.24.1
66

77
require (
8-
github.com/VirusTotal/yara-x/go v1.8.0
8+
github.com/VirusTotal/yara-x/go v1.9.0
99
github.com/agext/levenshtein v1.2.3
1010
github.com/cavaliergopher/cpio v1.0.1
1111
github.com/cavaliergopher/rpm v1.3.0

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
github.com/VirusTotal/yara-x/go v1.8.0 h1:fpSYlJWBryXO51TT1plRvwiOr6rvCtfy9XSy/y96rjQ=
2-
github.com/VirusTotal/yara-x/go v1.8.0/go.mod h1:lgXP/nkYX349MVowrtTtU5hzMdCOWQLv3+wKll9+0F8=
1+
github.com/VirusTotal/yara-x/go v1.9.0 h1:kuB9kH4sfc0+AwR54Kk6gI5XInbk1O+QeZU9DgOrTAU=
2+
github.com/VirusTotal/yara-x/go v1.9.0/go.mod h1:lgXP/nkYX349MVowrtTtU5hzMdCOWQLv3+wKll9+0F8=
33
github.com/agext/levenshtein v1.2.3 h1:YB2fHEn0UJagG8T1rrWknE3ZQzWM06O8AMAatNn7lmo=
44
github.com/agext/levenshtein v1.2.3/go.mod h1:JEDfjyjHDjOF/1e4FlBE/PkbqA9OfWu2ki2W0IB5558=
55
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=

pkg/action/scan.go

Lines changed: 31 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"context"
99
"errors"
1010
"fmt"
11+
"io"
1112
"io/fs"
1213
"log/slog"
1314
"os"
@@ -17,9 +18,6 @@ import (
1718
"strings"
1819
"sync"
1920
"sync/atomic"
20-
"syscall"
21-
22-
"github.com/minio/sha256-simd"
2321

2422
"github.com/chainguard-dev/clog"
2523
"github.com/chainguard-dev/malcontent/pkg/archive"
@@ -29,6 +27,7 @@ import (
2927
"github.com/chainguard-dev/malcontent/pkg/programkind"
3028
"github.com/chainguard-dev/malcontent/pkg/render"
3129
"github.com/chainguard-dev/malcontent/pkg/report"
30+
"github.com/minio/sha256-simd"
3231
"golang.org/x/sync/errgroup"
3332

3433
yarax "github.com/VirusTotal/yara-x/go"
@@ -39,55 +38,17 @@ func interactive(c malcontent.Config) bool {
3938
}
4039

4140
var (
42-
// compiledRuleCache are a cache of previously compiled rules.
43-
compiledRuleCache atomic.Pointer[yarax.Rules]
44-
// compileOnce ensures that we compile rules only once even across threads.
45-
compileOnce sync.Once
41+
compiledRuleCache atomic.Pointer[yarax.Rules] // compiledRuleCache are a cache of previously compiled rules.
42+
compileOnce sync.Once // compileOnce ensures that we compile rules only once even across threads.
4643
ErrMatchedCondition = errors.New("matched exit criteria")
47-
// initializeOnce ensures that the file and scanner pools are only initialized once.
48-
initializeOnce sync.Once
49-
scannerPool *pool.ScannerPool
50-
maxMmapSize int64 = 1 << 31
44+
initReadPool sync.Once // initReadPool ensures that the bytes read pool is only initialized once.
45+
initScannerPool sync.Once // initScannerPool ensures that the scanner pool is only initialized once.
46+
maxBytes int64 = 1 << 32 // 4GB
47+
readBuffer int64 = 64 * 1024 // 64KB
48+
readPool *pool.BufferPool
49+
scannerPool *pool.ScannerPool
5150
)
5251

53-
// scanFD scans a file descriptor using memory mapping for efficient large file handling.
54-
// This avoids loading the entire file into memory while still using yara-x's byte slice scanning.
55-
// scanFD also returns the file's contents for match string extraction,
56-
// as well as the file's size and its checksum which were originally calculated separately as part of report generation.
57-
func scanFD(scanner *yarax.Scanner, fd uintptr, size int64, logger *clog.Logger) ([]byte, *yarax.ScanResults, string, error) {
58-
stat := &syscall.Stat_t{}
59-
if err := syscall.Fstat(int(fd), stat); err != nil {
60-
return nil, nil, "", fmt.Errorf("fstat failed: %w", err)
61-
}
62-
63-
data, err := syscall.Mmap(int(fd), 0, int(size), syscall.PROT_READ, syscall.MAP_PRIVATE)
64-
if err != nil {
65-
return nil, nil, "", fmt.Errorf("mmap failed: %w", err)
66-
}
67-
68-
defer func() {
69-
if unmapErr := syscall.Munmap(data); unmapErr != nil {
70-
logger.Error("failed to unmap memory", "error", unmapErr)
71-
}
72-
}()
73-
74-
h := sha256.New()
75-
h.Write(data)
76-
checksum := fmt.Sprintf("%x", h.Sum(nil))
77-
78-
// Create a copy of the data to return since the mmap will be unmapped
79-
// This is necessary because report generation needs access to file content
80-
// for match string extraction
81-
fc := bytes.Clone(data)
82-
83-
mrs, err := scanner.Scan(data)
84-
if err != nil {
85-
return nil, nil, "", err
86-
}
87-
88-
return fc, mrs, checksum, err
89-
}
90-
9152
// scanSinglePath YARA scans a single path and converts it to a fileReport.
9253
func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleFS []fs.FS, absPath string, archiveRoot string) (*malcontent.FileReport, error) {
9354
if ctx.Err() != nil {
@@ -103,7 +64,6 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF
10364
if err != nil {
10465
return nil, err
10566
}
106-
fd := f.Fd()
10767

10868
fi, err := f.Stat()
10969
if err != nil {
@@ -119,12 +79,24 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF
11979
return fr, nil
12080
}
12181

122-
if size > maxMmapSize {
123-
logger.Warn("file exceeds mmap limit, scanning first portion only",
124-
"size", size, "limit", maxMmapSize)
125-
size = maxMmapSize
82+
initReadPool.Do(func() {
83+
readPool = pool.NewBufferPool(runtime.GOMAXPROCS(0))
84+
})
85+
buf := readPool.Get(readBuffer) //nolint:nilaway // the buffer pool is created above
86+
87+
var fc bytes.Buffer
88+
_, err = io.CopyBuffer(&fc, io.LimitReader(f, maxBytes), buf)
89+
if err != nil {
90+
return nil, err
12691
}
12792

93+
h := sha256.New()
94+
_, err = h.Write(fc.Bytes())
95+
if err != nil {
96+
return nil, err
97+
}
98+
checksum := fmt.Sprintf("%x", h.Sum(nil))
99+
128100
mime := "<unknown>"
129101
kind, err := programkind.File(ctx, path)
130102
if err != nil && !interactive(c) {
@@ -155,14 +127,14 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF
155127
}
156128
}
157129

158-
initializeOnce.Do(func() {
130+
initScannerPool.Do(func() {
159131
// always create one scanner per available CPU core since the pool is used for the duration of
160132
// a scan which may involve concurrent scans of individual files
161133
scannerPool = pool.NewScannerPool(yrs, getMaxConcurrency(runtime.GOMAXPROCS(0)))
162134
})
163135
scanner := scannerPool.Get(yrs)
164136

165-
fc, mrs, checksum, err := scanFD(scanner, fd, size, logger)
137+
mrs, err := scanner.ScanFile(path)
166138
if err != nil {
167139
logger.Debug("skipping", slog.Any("error", err))
168140
return nil, err
@@ -180,16 +152,17 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF
180152
return fr, nil
181153
}
182154

183-
fr, err := report.Generate(ctx, path, mrs, c, archiveRoot, logger, fc, size, checksum, kind, risk)
155+
fr, err := report.Generate(ctx, path, mrs, c, archiveRoot, logger, fc.Bytes(), size, checksum, kind, risk)
184156
if err != nil {
185157
return nil, NewFileReportError(err, path, TypeGenerateError)
186158
}
187159

188160
defer func() {
189161
f.Close()
162+
readPool.Put(buf)
190163
scannerPool.Put(scanner)
191-
fc = nil
192164
mrs = nil
165+
fc.Reset()
193166
}()
194167

195168
// Clean up the path if scanning an archive

0 commit comments

Comments
 (0)