Skip to content

Commit 3d818a5

Browse files
authored
fix: improve diff determinism for UPX files and file moves in general (#1265)
* fix: improve diff determinism for UPX files and file moves in general Signed-off-by: egibs <[email protected]> * use the decompressed files in handleDir as well Signed-off-by: egibs <[email protected]> --------- Signed-off-by: egibs <[email protected]>
1 parent 9abcc2d commit 3d818a5

File tree

4 files changed

+51
-15
lines changed

4 files changed

+51
-15
lines changed

pkg/action/diff.go

Lines changed: 47 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,12 @@ import (
77
"context"
88
"fmt"
99
"log/slog"
10+
"maps"
1011
"os"
1112
"path/filepath"
1213
"regexp"
1314
"slices"
15+
"sort"
1416
"strings"
1517

1618
"github.com/agext/levenshtein"
@@ -113,6 +115,37 @@ func relPath(from string, fr *malcontent.FileReport, isArchive bool, isImage boo
113115
return rel, base, nil
114116
}
115117

118+
// selectPrimaryFile selects a single file from a map of file reports in a deterministic way.
119+
// e.g., when a UPX-packed file is scanned, it produces the decompressed file
120+
// and preserves the original file (with a .~ suffix).
121+
func selectPrimaryFile(files map[string]*malcontent.FileReport) *malcontent.FileReport {
122+
if len(files) == 0 {
123+
return nil
124+
}
125+
126+
keys := slices.Sorted(maps.Keys(files))
127+
128+
if i := slices.IndexFunc(keys, func(k string) bool {
129+
return !strings.HasSuffix(k, ".~")
130+
}); i >= 0 {
131+
return files[keys[i]]
132+
}
133+
134+
return files[keys[0]]
135+
}
136+
137+
// isUPXBackup returns true if the path is a UPX backup file (.~ suffix)
138+
// and the corresponding decompressed file exists in the files map.
139+
func isUPXBackup(path string, files map[string]*malcontent.FileReport) bool {
140+
if !strings.HasSuffix(path, ".~") {
141+
return false
142+
}
143+
144+
decompressed := strings.TrimSuffix(path, ".~")
145+
_, exists := files[decompressed]
146+
return exists
147+
}
148+
116149
func relFileReport(ctx context.Context, c malcontent.Config, fromPath string, isImage bool) (map[string]*malcontent.FileReport, string, error) {
117150
if ctx.Err() != nil {
118151
return nil, "", ctx.Err()
@@ -283,15 +316,8 @@ func Diff(ctx context.Context, c malcontent.Config, _ *clog.Logger) (*malcontent
283316
if shouldHandleDir {
284317
handleDir(ctx, c, srcResult, destResult, d, archiveOrImage)
285318
} else {
286-
var srcFile, destFile *malcontent.FileReport
287-
for _, fr := range srcResult.files {
288-
srcFile = fr
289-
break
290-
}
291-
for _, fr := range destResult.files {
292-
destFile = fr
293-
break
294-
}
319+
srcFile := selectPrimaryFile(srcResult.files)
320+
destFile := selectPrimaryFile(destResult.files)
295321
if srcFile != nil && destFile != nil {
296322
removed := formatKey(srcResult, CleanPath(srcFile.Path, srcResult.tmpRoot))
297323
added := formatKey(srcResult, CleanPath(destFile.Path, destResult.tmpRoot))
@@ -325,12 +351,12 @@ func handleDir(ctx context.Context, c malcontent.Config, src, dest ScanResult, d
325351
srcFiles, destFiles := make(map[string]*malcontent.FileReport), make(map[string]*malcontent.FileReport)
326352

327353
for rel, fr := range src.files {
328-
if rel != "" {
354+
if rel != "" && !isUPXBackup(rel, src.files) {
329355
srcFiles[rel] = fr
330356
}
331357
}
332358
for rel, fr := range dest.files {
333-
if rel != "" {
359+
if rel != "" && !isUPXBackup(rel, dest.files) {
334360
destFiles[rel] = fr
335361
}
336362
}
@@ -419,6 +445,11 @@ func handleFile(ctx context.Context, c malcontent.Config, fr, tr *malcontent.Fil
419445
rbs.Behaviors = append(rbs.Behaviors, tb)
420446
}
421447

448+
// Sort behaviors by ID for deterministic output
449+
sort.Slice(rbs.Behaviors, func(i, j int) bool {
450+
return rbs.Behaviors[i].ID < rbs.Behaviors[j].ID
451+
})
452+
422453
if archiveOrImage {
423454
rbs.Path = CleanPath(rbs.Path, "/private")
424455
rbs.Path = formatKey(dest, CleanPath(rbs.Path, dest.tmpRoot))
@@ -598,6 +629,11 @@ func fileMove(ctx context.Context, c malcontent.Config, fr, tr *malcontent.FileR
598629
}
599630
}
600631

632+
// Sort behaviors by ID for deterministic output
633+
sort.Slice(abs.Behaviors, func(i, j int) bool {
634+
return abs.Behaviors[i].ID < abs.Behaviors[j].ID
635+
})
636+
601637
if archiveOrImage {
602638
abs.Path = CleanPath(abs.Path, "/private")
603639
abs.PreviousPath = CleanPath(abs.PreviousPath, "/private")

tests/linux/2023.FreeDownloadManager/freedownloadmanager.sdiff

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
*** changed (22 added, 1 removed): linux/2023.FreeDownloadManager/freedownloadmanager_infected_postinst
2-
-c2/tool_transfer/os
32
+anti-static/base64/exec
43
+anti-static/base64/http_agent
54
+anti-static/elf/base64
5+
-c2/tool_transfer/os
66
+data/base64/external
77
+data/embedded/base64_elf
88
+data/embedded/base64_terms
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
*** changed (3 added, 1 removed): linux/2024.sbcl.market/sbcl.dirty
2-
-crypto/rc4
32
+anti-static/elf/entropy
3+
-crypto/rc4
44
+data/embedded/zstd
55
+net/dns/txt

tests/macOS/clean/ls.sdiff.trigger_2

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
-c2/tool_transfer/os
44
-data/compression/lzma
55
-discover/system/hostname
6-
-net/url/embedded
7-
-process/name_set
86
+fs/directory/traverse
97
+net/http
8+
-net/url/embedded
9+
-process/name_set

0 commit comments

Comments
 (0)