Skip to content

Commit e5e4777

Browse files
committed
feat: add new flag to score all paths when diffing
Signed-off-by: egibs <[email protected]>
1 parent 9319847 commit e5e4777

File tree

24 files changed

+3360
-3256
lines changed

24 files changed

+3360
-3256
lines changed

cmd/mal/mal.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ var (
6767
outputFlag string
6868
profileFlag bool
6969
quantityIncreasesRiskFlag bool
70+
scoreAllFlag bool
7071
statsFlag bool
7172
thirdPartyFlag bool
7273
verboseFlag bool
@@ -279,6 +280,11 @@ func main() {
279280
Stats: statsFlag,
280281
}
281282

283+
// always trim macOS' /private prefix
284+
if runtime.GOOS == "darwin" {
285+
mc.TrimPrefixes = append(mc.TrimPrefixes, "/private")
286+
}
287+
282288
return nil
283289
},
284290
// Global flags shared between commands
@@ -485,13 +491,21 @@ func main() {
485491
Usage: "Scan an image",
486492
Destination: &diffImageFlag,
487493
},
494+
&cli.BoolFlag{
495+
Name: "score-all",
496+
Value: false,
497+
Usage: "Compute the Levenshtein distance for all source and destination paths (warning: experimental and slow!)",
498+
Destination: &scoreAllFlag,
499+
},
488500
},
489501
Action: func(c *cli.Context) error {
490502
switch {
491503
case c.Bool("file-risk-change"):
492504
mc.FileRiskChange = true
493505
case c.Bool("file-risk-increase"):
494506
mc.FileRiskIncrease = true
507+
case c.Bool("score-all"):
508+
mc.ScoreAll = true
495509
default:
496510
}
497511
// Allow for images to be scanned with the file risk flags

pkg/action/diff.go

Lines changed: 86 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@ import (
77
"context"
88
"fmt"
99
"log/slog"
10-
"math"
1110
"os"
1211
"path/filepath"
1312
"regexp"
13+
"slices"
1414
"strings"
1515

1616
"github.com/agext/levenshtein"
@@ -290,7 +290,9 @@ func Diff(ctx context.Context, c malcontent.Config, _ *clog.Logger) (*malcontent
290290
// When scanning two files, do a 1:1 comparison and
291291
// consider the source -> destination as a change rather than an add/delete
292292
shouldHandleDir := ((srcInfo.IsDir() && destInfo.IsDir()) || (srcIsArchive && destIsArchive)) || isImage
293+
archiveOrImage := (srcIsArchive && destIsArchive) || isImage
293294

295+
//nolint:nestif // ignore complexity of 12
294296
if shouldHandleDir {
295297
handleDir(ctx, c, srcResult, destResult, d, isImage)
296298
} else {
@@ -306,24 +308,31 @@ func Diff(ctx context.Context, c malcontent.Config, _ *clog.Logger) (*malcontent
306308
if srcFile != nil && destFile != nil {
307309
formatSrc := displayPath(srcResult.base, srcFile.Path)
308310
formatDest := displayPath(destResult.base, destFile.Path)
309-
if scoreFile(srcFile, destFile) {
310-
d.Removed.Set(srcFile.Path, srcFile)
311-
d.Added.Set(destFile.Path, destFile)
312-
inferMoves(ctx, c, d, srcResult, destResult, isImage)
311+
if c.ScoreAll || scoreFile(srcFile, destFile) {
312+
removed, added := srcFile.Path, destFile.Path
313+
if archiveOrImage {
314+
if rp, ap := strings.Split(srcFile.Path, "∴"), strings.Split(destFile.Path, "∴"); len(rp) == 2 && len(ap) == 2 {
315+
removed = filepath.Base(strings.TrimSpace(rp[1]))
316+
added = filepath.Base(strings.TrimSpace(ap[1]))
317+
}
318+
}
319+
d.Removed.Set(removed, srcFile)
320+
d.Added.Set(added, destFile)
321+
inferMoves(ctx, c, d, srcResult, destResult, archiveOrImage)
313322
} else {
314-
handleFile(ctx, c, srcFile, destFile, fmt.Sprintf("%s -> %s", formatSrc, formatDest), d, srcResult, destResult, isImage)
323+
handleFile(ctx, c, srcFile, destFile, fmt.Sprintf("%s -> %s", formatSrc, formatDest), d, srcResult, destResult, archiveOrImage)
315324
}
316325
}
317326
}
318327

319328
// skip inferring moves if added and removed are empty
320329
if d.Added != nil && d.Removed != nil {
321-
inferMoves(ctx, c, d, srcResult, destResult, isImage)
330+
inferMoves(ctx, c, d, srcResult, destResult, archiveOrImage)
322331
}
323332
return &malcontent.Report{Diff: d}, nil
324333
}
325334

326-
func handleDir(ctx context.Context, c malcontent.Config, src, dest ScanResult, d *malcontent.DiffReport, isImage bool) {
335+
func handleDir(ctx context.Context, c malcontent.Config, src, dest ScanResult, d *malcontent.DiffReport, archiveOrImage bool) {
327336
if ctx.Err() != nil {
328337
return
329338
}
@@ -343,25 +352,30 @@ func handleDir(ctx context.Context, c malcontent.Config, src, dest ScanResult, d
343352
// Files that exist in both pass to handleFile which considers files as modifications
344353
// Otherwise, treat the source file as existing only in the source directory
345354
// These files are considered removals from the destination
355+
//nolint:nestif // ignore complexity of 10
346356
for name, srcFr := range srcFiles {
347357
if destFr, exists := destFiles[name]; exists {
348358
if !filterDiff(ctx, c, srcFr, destFr) {
349359
formatSrc := displayPath(name, srcFr.Path)
350360
formatDest := displayPath(name, destFr.Path)
351-
if scoreFile(srcFr, destFr) {
361+
if c.ScoreAll || scoreFile(srcFr, destFr) {
352362
d.Removed.Set(srcFr.Path, srcFr)
353363
d.Added.Set(destFr.Path, destFr)
354-
inferMoves(ctx, c, d, src, dest, isImage)
364+
inferMoves(ctx, c, d, src, dest, archiveOrImage)
355365
} else {
356-
handleFile(ctx, c, srcFr, destFr, fmt.Sprintf("%s -> %s", formatSrc, formatDest), d, src, dest, isImage)
366+
handleFile(ctx, c, srcFr, destFr, fmt.Sprintf("%s -> %s", formatSrc, formatDest), d, src, dest, archiveOrImage)
357367
}
358368
}
359369
} else {
360370
formatSrc := displayPath(name, srcFr.Path)
361371
dirPath := filepath.Dir(formatSrc)
362372
key := fmt.Sprintf("%s/%s", dirPath, name)
363-
if isImage {
364-
key = fmt.Sprintf("%s ∴ /%s", src.imageURI, name)
373+
if archiveOrImage {
374+
if src.imageURI != "" {
375+
key = fmt.Sprintf("%s ∴ /%s", src.imageURI, name)
376+
} else if src.tmpRoot != "" {
377+
key = fmt.Sprintf("%s ∴ /%s", src.base, name)
378+
}
365379
}
366380
d.Removed.Set(key, srcFr)
367381
}
@@ -374,8 +388,12 @@ func handleDir(ctx context.Context, c malcontent.Config, src, dest ScanResult, d
374388
formatDest := displayPath(name, destFr.Path)
375389
dirPath := filepath.Dir(formatDest)
376390
key := fmt.Sprintf("%s/%s", dirPath, name)
377-
if isImage {
378-
key = fmt.Sprintf("%s ∴ /%s", dest.imageURI, name)
391+
if archiveOrImage {
392+
if dest.imageURI != "" {
393+
key = fmt.Sprintf("%s ∴ /%s", dest.imageURI, name)
394+
} else if dest.tmpRoot != "" {
395+
key = fmt.Sprintf("%s ∴ /%s", dest.base, name)
396+
}
379397
}
380398
d.Added.Set(key, destFr)
381399
}
@@ -433,10 +451,15 @@ func handleFile(ctx context.Context, c malcontent.Config, fr, tr *malcontent.Fil
433451
}
434452

435453
func createFileReport(tr, fr *malcontent.FileReport) *malcontent.FileReport {
454+
// format each path similar to scan.go
455+
path := CleanPath(tr.Path, filepath.Dir(tr.ArchiveRoot))
456+
prevPath := CleanPath(fr.Path, filepath.Dir(fr.ArchiveRoot))
457+
prevRelPath := CleanPath(fr.PreviousRelPath, filepath.Dir(fr.ArchiveRoot))
458+
436459
return &malcontent.FileReport{
437-
Path: tr.Path,
438-
PreviousPath: fr.Path,
439-
PreviousRelPath: fr.PreviousRelPath,
460+
Path: path,
461+
PreviousPath: prevPath,
462+
PreviousRelPath: prevRelPath,
440463
Behaviors: []*malcontent.Behavior{},
441464
PreviousRiskScore: fr.RiskScore,
442465
PreviousRiskLevel: fr.RiskLevel,
@@ -446,27 +469,31 @@ func createFileReport(tr, fr *malcontent.FileReport) *malcontent.FileReport {
446469
}
447470

448471
func behaviorExists(b *malcontent.Behavior, behaviors []*malcontent.Behavior) bool {
449-
for _, tb := range behaviors {
450-
if tb.ID == b.ID {
451-
return true
452-
}
453-
}
454-
return false
472+
return slices.ContainsFunc(behaviors, func(tb *malcontent.Behavior) bool {
473+
return tb.ID == b.ID
474+
})
455475
}
456476

457477
// combine iterates over the removed and added channels to create a diff report to store in the combined channel.
458-
func combineReports(removed, added *orderedmap.OrderedMap[string, *malcontent.FileReport]) []malcontent.CombinedReport {
478+
func combineReports(_ malcontent.Config, removed, added *orderedmap.OrderedMap[string, *malcontent.FileReport], archiveOrImage bool) []malcontent.CombinedReport {
459479
combined := make([]malcontent.CombinedReport, 0, removed.Len()*added.Len())
460480
for r := removed.Oldest(); r != nil; r = r.Next() {
461481
for a := added.Oldest(); a != nil; a = a.Next() {
462-
score := levenshtein.Match(r.Key, a.Key, levenshtein.NewParams())
482+
removed, added := r.Key, a.Key
483+
if archiveOrImage {
484+
if rp, ap := strings.Split(r.Key, "∴"), strings.Split(a.Key, "∴"); len(rp) == 2 && len(ap) == 2 {
485+
removed = filepath.Base(strings.TrimSpace(rp[1]))
486+
added = filepath.Base(strings.TrimSpace(ap[1]))
487+
}
488+
}
489+
score := levenshtein.Match(removed, added, levenshtein.NewParams())
463490
if score < 0.9 {
464491
continue
465492
}
466493
combined = append(combined, malcontent.CombinedReport{
467-
Added: a.Key,
494+
Added: added,
468495
AddedFR: a.Value,
469-
Removed: r.Key,
496+
Removed: removed,
470497
RemovedFR: r.Value,
471498
Score: score,
472499
})
@@ -475,22 +502,22 @@ func combineReports(removed, added *orderedmap.OrderedMap[string, *malcontent.Fi
475502
return combined
476503
}
477504

478-
func inferMoves(ctx context.Context, c malcontent.Config, d *malcontent.DiffReport, src, dest ScanResult, isImage bool) {
505+
func inferMoves(ctx context.Context, c malcontent.Config, d *malcontent.DiffReport, src, dest ScanResult, archiveOrImage bool) {
479506
if ctx.Err() != nil {
480507
return
481508
}
482509

483-
for _, cr := range combineReports(d.Removed, d.Added) {
484-
fileMove(ctx, c, cr.RemovedFR, cr.AddedFR, cr.Removed, cr.Added, d, cr.Score, src, dest, isImage)
510+
for _, cr := range combineReports(c, d.Removed, d.Added, archiveOrImage) {
511+
fileMove(ctx, c, cr.RemovedFR, cr.AddedFR, cr.Removed, cr.Added, d, cr.Score, src, dest, archiveOrImage)
485512
}
486513
}
487514

488-
func fileMove(ctx context.Context, c malcontent.Config, fr, tr *malcontent.FileReport, rpath, apath string, d *malcontent.DiffReport, score float64, _, dest ScanResult, isImage bool) {
515+
func fileMove(ctx context.Context, c malcontent.Config, fr, tr *malcontent.FileReport, rpath, apath string, d *malcontent.DiffReport, score float64, src ScanResult, dest ScanResult, archiveOrImage bool) {
489516
if ctx.Err() != nil {
490517
return
491518
}
492519

493-
minRisk := int(math.Min(float64(c.MinRisk), float64(c.MinFileRisk)))
520+
minRisk := min(c.MinRisk, c.MinFileRisk)
494521
if fr.RiskScore < minRisk && tr.RiskScore < minRisk {
495522
clog.FromContext(ctx).Info("diff does not meet min trigger level", slog.Any("path", tr.Path))
496523
return
@@ -502,11 +529,16 @@ func fileMove(ctx context.Context, c malcontent.Config, fr, tr *malcontent.FileR
502529
return
503530
}
504531

532+
// handle the same path cleanup as above
533+
path := CleanPath(tr.Path, filepath.Dir(tr.ArchiveRoot))
534+
prevPath := CleanPath(fr.Path, filepath.Dir(fr.ArchiveRoot))
535+
prevRelPath := CleanPath(fr.Path, filepath.Dir(fr.ArchiveRoot))
536+
505537
// We think that this file moved from rpath to apath.
506538
abs := &malcontent.FileReport{
507-
Path: tr.Path,
508-
PreviousPath: fr.Path,
509-
PreviousRelPath: rpath,
539+
Path: path,
540+
PreviousPath: prevPath,
541+
PreviousRelPath: prevRelPath,
510542
PreviousRelPathScore: score,
511543

512544
Behaviors: []*malcontent.Behavior{},
@@ -522,6 +554,8 @@ func fileMove(ctx context.Context, c malcontent.Config, fr, tr *malcontent.FileR
522554
if !behaviorExists(tb, fr.Behaviors) {
523555
tb.DiffAdded = true
524556
abs.Behaviors = append(abs.Behaviors, tb)
557+
} else {
558+
abs.Behaviors = append(abs.Behaviors, tb)
525559
}
526560
}
527561

@@ -530,16 +564,22 @@ func fileMove(ctx context.Context, c malcontent.Config, fr, tr *malcontent.FileR
530564
if !behaviorExists(fb, tr.Behaviors) {
531565
fb.DiffRemoved = true
532566
abs.Behaviors = append(abs.Behaviors, fb)
533-
}
534-
if behaviorExists(fb, tr.Behaviors) {
567+
} else {
535568
abs.Behaviors = append(abs.Behaviors, fb)
536569
}
537570
}
538571

539-
if isImage {
540-
abs.Path = strings.TrimPrefix(abs.Path, "/private")
541-
abs.Path = fmt.Sprintf("%s ∴ %s", dest.imageURI, strings.TrimPrefix(abs.Path, dest.tmpRoot))
572+
if archiveOrImage {
573+
abs.Path = CleanPath(abs.Path, "/private")
574+
abs.PreviousPath = CleanPath(abs.PreviousPath, "/private")
575+
if dest.imageURI != "" {
576+
abs.Path = fmt.Sprintf("%s ∴ %s", dest.imageURI, strings.TrimPrefix(abs.Path, dest.tmpRoot))
577+
}
578+
if src.imageURI != "" {
579+
abs.PreviousPath = fmt.Sprintf("%s ∴ %s", src.imageURI, strings.TrimPrefix(abs.PreviousPath, src.tmpRoot))
580+
}
542581
}
582+
543583
d.Modified.Set(apath, abs)
544584
d.Removed.Delete(rpath)
545585
d.Added.Delete(apath)
@@ -554,14 +594,14 @@ func filterDiff(ctx context.Context, c malcontent.Config, fr, tr *malcontent.Fil
554594
return false
555595
}
556596

557-
if c.FileRiskChange && fr.RiskScore == tr.RiskScore {
597+
switch {
598+
case c.FileRiskChange && fr.RiskScore == tr.RiskScore:
558599
clog.FromContext(ctx).Info("dropping result because diff scores were the same", slog.Any("paths", fmt.Sprintf("%s (%d) %s (%d)", fr.Path, fr.RiskScore, tr.Path, tr.RiskScore)))
559600
return true
560-
}
561-
if c.FileRiskIncrease && fr.RiskScore >= tr.RiskScore {
601+
case c.FileRiskIncrease && fr.RiskScore >= tr.RiskScore:
562602
clog.FromContext(ctx).Info("dropping result because old score was the same or higher than the new score", slog.Any("paths ", fmt.Sprintf("%s (%d) %s (%d)", fr.Path, fr.RiskScore, tr.Path, tr.RiskScore)))
563603
return true
604+
default:
605+
return false
564606
}
565-
566-
return false
567607
}

pkg/action/path.go

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,15 +58,12 @@ func findFilesRecursively(ctx context.Context, rootPath string) ([]string, error
5858
return files, err
5959
}
6060

61-
// cleanPath removes the temporary directory prefix from the path.
62-
func cleanPath(path string, prefix string) string {
63-
return strings.TrimPrefix(path, prefix)
61+
// CleanPath removes the temporary directory prefix from the path.
62+
func CleanPath(path string, prefix string) string {
63+
return formatPath(strings.TrimPrefix(path, prefix))
6464
}
6565

6666
// formatPath formats the path for display.
6767
func formatPath(path string) string {
68-
if strings.Contains(path, "\\") {
69-
path = strings.ReplaceAll(path, "\\", "/")
70-
}
71-
return path
68+
return strings.ReplaceAll(path, "\\", "/")
7269
}

pkg/action/scan.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -174,13 +174,15 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF
174174
if err != nil {
175175
return nil, NewFileReportError(err, path, TypeGenerateError)
176176
}
177-
if runtime.GOOS == "darwin" {
178-
pathAbs = strings.TrimPrefix(pathAbs, "/private")
179-
archiveRootAbs = strings.TrimPrefix(archiveRootAbs, "/private")
180-
}
177+
178+
// handle macOS prefixing temporary directories with /private
179+
absPath = CleanPath(absPath, "/private")
180+
pathAbs = CleanPath(pathAbs, "/private")
181+
archiveRootAbs = CleanPath(archiveRootAbs, "/private")
182+
181183
fr.ArchiveRoot = archiveRootAbs
182184
fr.FullPath = pathAbs
183-
clean = formatPath(cleanPath(pathAbs, archiveRootAbs))
185+
clean = CleanPath(pathAbs, archiveRootAbs)
184186

185187
if absPath != "" && absPath != path && (isArchive || c.OCI) {
186188
if len(c.TrimPrefixes) > 0 {

pkg/action/scan_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ func TestCleanPath(t *testing.T) {
8686
fullPath := filepath.Join(tempDir, tt.path)
8787
fullPrefix := filepath.Join(tempDir, tt.prefix)
8888

89-
got := cleanPath(fullPath, fullPrefix)
89+
got := CleanPath(fullPath, fullPrefix)
9090
if !strings.HasSuffix(got, tt.want) {
9191
t.Errorf("cleanPath() = %v, want suffix %v", got, tt.want)
9292
}

0 commit comments

Comments
 (0)