@@ -7,10 +7,10 @@ import (
77 "context"
88 "fmt"
99 "log/slog"
10- "math"
1110 "os"
1211 "path/filepath"
1312 "regexp"
13+ "slices"
1414 "strings"
1515
1616 "github.com/agext/levenshtein"
@@ -290,7 +290,9 @@ func Diff(ctx context.Context, c malcontent.Config, _ *clog.Logger) (*malcontent
290290 // When scanning two files, do a 1:1 comparison and
291291 // consider the source -> destination as a change rather than an add/delete
292292 shouldHandleDir := ((srcInfo .IsDir () && destInfo .IsDir ()) || (srcIsArchive && destIsArchive )) || isImage
293+ archiveOrImage := (srcIsArchive && destIsArchive ) || isImage
293294
295+ //nolint:nestif // ignore complexity of 12
294296 if shouldHandleDir {
295297 handleDir (ctx , c , srcResult , destResult , d , isImage )
296298 } else {
@@ -306,24 +308,31 @@ func Diff(ctx context.Context, c malcontent.Config, _ *clog.Logger) (*malcontent
306308 if srcFile != nil && destFile != nil {
307309 formatSrc := displayPath (srcResult .base , srcFile .Path )
308310 formatDest := displayPath (destResult .base , destFile .Path )
309- if scoreFile (srcFile , destFile ) {
310- d .Removed .Set (srcFile .Path , srcFile )
311- d .Added .Set (destFile .Path , destFile )
312- inferMoves (ctx , c , d , srcResult , destResult , isImage )
311+ if c .ScoreAll || scoreFile (srcFile , destFile ) {
312+ removed , added := srcFile .Path , destFile .Path
313+ if archiveOrImage {
314+ if rp , ap := strings .Split (srcFile .Path , "∴" ), strings .Split (destFile .Path , "∴" ); len (rp ) == 2 && len (ap ) == 2 {
315+ removed = filepath .Base (strings .TrimSpace (rp [1 ]))
316+ added = filepath .Base (strings .TrimSpace (ap [1 ]))
317+ }
318+ }
319+ d .Removed .Set (removed , srcFile )
320+ d .Added .Set (added , destFile )
321+ inferMoves (ctx , c , d , srcResult , destResult , archiveOrImage )
313322 } else {
314- handleFile (ctx , c , srcFile , destFile , fmt .Sprintf ("%s -> %s" , formatSrc , formatDest ), d , srcResult , destResult , isImage )
323+ handleFile (ctx , c , srcFile , destFile , fmt .Sprintf ("%s -> %s" , formatSrc , formatDest ), d , srcResult , destResult , archiveOrImage )
315324 }
316325 }
317326 }
318327
319328 // skip inferring moves if added and removed are empty
320329 if d .Added != nil && d .Removed != nil {
321- inferMoves (ctx , c , d , srcResult , destResult , isImage )
330+ inferMoves (ctx , c , d , srcResult , destResult , archiveOrImage )
322331 }
323332 return & malcontent.Report {Diff : d }, nil
324333}
325334
326- func handleDir (ctx context.Context , c malcontent.Config , src , dest ScanResult , d * malcontent.DiffReport , isImage bool ) {
335+ func handleDir (ctx context.Context , c malcontent.Config , src , dest ScanResult , d * malcontent.DiffReport , archiveOrImage bool ) {
327336 if ctx .Err () != nil {
328337 return
329338 }
@@ -343,25 +352,30 @@ func handleDir(ctx context.Context, c malcontent.Config, src, dest ScanResult, d
343352 // Files that exist in both pass to handleFile which considers files as modifications
344353 // Otherwise, treat the source file as existing only in the source directory
345354 // These files are considered removals from the destination
355+ //nolint:nestif // ignore complexity of 10
346356 for name , srcFr := range srcFiles {
347357 if destFr , exists := destFiles [name ]; exists {
348358 if ! filterDiff (ctx , c , srcFr , destFr ) {
349359 formatSrc := displayPath (name , srcFr .Path )
350360 formatDest := displayPath (name , destFr .Path )
351- if scoreFile (srcFr , destFr ) {
361+ if c . ScoreAll || scoreFile (srcFr , destFr ) {
352362 d .Removed .Set (srcFr .Path , srcFr )
353363 d .Added .Set (destFr .Path , destFr )
354- inferMoves (ctx , c , d , src , dest , isImage )
364+ inferMoves (ctx , c , d , src , dest , archiveOrImage )
355365 } else {
356- handleFile (ctx , c , srcFr , destFr , fmt .Sprintf ("%s -> %s" , formatSrc , formatDest ), d , src , dest , isImage )
366+ handleFile (ctx , c , srcFr , destFr , fmt .Sprintf ("%s -> %s" , formatSrc , formatDest ), d , src , dest , archiveOrImage )
357367 }
358368 }
359369 } else {
360370 formatSrc := displayPath (name , srcFr .Path )
361371 dirPath := filepath .Dir (formatSrc )
362372 key := fmt .Sprintf ("%s/%s" , dirPath , name )
363- if isImage {
364- key = fmt .Sprintf ("%s ∴ /%s" , src .imageURI , name )
373+ if archiveOrImage {
374+ if src .imageURI != "" {
375+ key = fmt .Sprintf ("%s ∴ /%s" , src .imageURI , name )
376+ } else if src .tmpRoot != "" {
377+ key = fmt .Sprintf ("%s ∴ /%s" , src .base , name )
378+ }
365379 }
366380 d .Removed .Set (key , srcFr )
367381 }
@@ -374,8 +388,12 @@ func handleDir(ctx context.Context, c malcontent.Config, src, dest ScanResult, d
374388 formatDest := displayPath (name , destFr .Path )
375389 dirPath := filepath .Dir (formatDest )
376390 key := fmt .Sprintf ("%s/%s" , dirPath , name )
377- if isImage {
378- key = fmt .Sprintf ("%s ∴ /%s" , dest .imageURI , name )
391+ if archiveOrImage {
392+ if dest .imageURI != "" {
393+ key = fmt .Sprintf ("%s ∴ /%s" , dest .imageURI , name )
394+ } else if dest .tmpRoot != "" {
395+ key = fmt .Sprintf ("%s ∴ /%s" , dest .base , name )
396+ }
379397 }
380398 d .Added .Set (key , destFr )
381399 }
@@ -433,10 +451,15 @@ func handleFile(ctx context.Context, c malcontent.Config, fr, tr *malcontent.Fil
433451}
434452
435453func createFileReport (tr , fr * malcontent.FileReport ) * malcontent.FileReport {
454+ // format each path similar to scan.go
455+ path := CleanPath (tr .Path , filepath .Dir (tr .ArchiveRoot ))
456+ prevPath := CleanPath (fr .Path , filepath .Dir (fr .ArchiveRoot ))
457+ prevRelPath := CleanPath (fr .PreviousRelPath , filepath .Dir (fr .ArchiveRoot ))
458+
436459 return & malcontent.FileReport {
437- Path : tr . Path ,
438- PreviousPath : fr . Path ,
439- PreviousRelPath : fr . PreviousRelPath ,
460+ Path : path ,
461+ PreviousPath : prevPath ,
462+ PreviousRelPath : prevRelPath ,
440463 Behaviors : []* malcontent.Behavior {},
441464 PreviousRiskScore : fr .RiskScore ,
442465 PreviousRiskLevel : fr .RiskLevel ,
@@ -446,27 +469,31 @@ func createFileReport(tr, fr *malcontent.FileReport) *malcontent.FileReport {
446469}
447470
448471func behaviorExists (b * malcontent.Behavior , behaviors []* malcontent.Behavior ) bool {
449- for _ , tb := range behaviors {
450- if tb .ID == b .ID {
451- return true
452- }
453- }
454- return false
472+ return slices .ContainsFunc (behaviors , func (tb * malcontent.Behavior ) bool {
473+ return tb .ID == b .ID
474+ })
455475}
456476
457477// combine iterates over the removed and added channels to create a diff report to store in the combined channel.
458- func combineReports (removed , added * orderedmap.OrderedMap [string , * malcontent.FileReport ]) []malcontent.CombinedReport {
478+ func combineReports (_ malcontent. Config , removed , added * orderedmap.OrderedMap [string , * malcontent.FileReport ], archiveOrImage bool ) []malcontent.CombinedReport {
459479 combined := make ([]malcontent.CombinedReport , 0 , removed .Len ()* added .Len ())
460480 for r := removed .Oldest (); r != nil ; r = r .Next () {
461481 for a := added .Oldest (); a != nil ; a = a .Next () {
462- score := levenshtein .Match (r .Key , a .Key , levenshtein .NewParams ())
482+ removed , added := r .Key , a .Key
483+ if archiveOrImage {
484+ if rp , ap := strings .Split (r .Key , "∴" ), strings .Split (a .Key , "∴" ); len (rp ) == 2 && len (ap ) == 2 {
485+ removed = filepath .Base (strings .TrimSpace (rp [1 ]))
486+ added = filepath .Base (strings .TrimSpace (ap [1 ]))
487+ }
488+ }
489+ score := levenshtein .Match (removed , added , levenshtein .NewParams ())
463490 if score < 0.9 {
464491 continue
465492 }
466493 combined = append (combined , malcontent.CombinedReport {
467- Added : a . Key ,
494+ Added : added ,
468495 AddedFR : a .Value ,
469- Removed : r . Key ,
496+ Removed : removed ,
470497 RemovedFR : r .Value ,
471498 Score : score ,
472499 })
@@ -475,22 +502,22 @@ func combineReports(removed, added *orderedmap.OrderedMap[string, *malcontent.Fi
475502 return combined
476503}
477504
478- func inferMoves (ctx context.Context , c malcontent.Config , d * malcontent.DiffReport , src , dest ScanResult , isImage bool ) {
505+ func inferMoves (ctx context.Context , c malcontent.Config , d * malcontent.DiffReport , src , dest ScanResult , archiveOrImage bool ) {
479506 if ctx .Err () != nil {
480507 return
481508 }
482509
483- for _ , cr := range combineReports (d .Removed , d .Added ) {
484- fileMove (ctx , c , cr .RemovedFR , cr .AddedFR , cr .Removed , cr .Added , d , cr .Score , src , dest , isImage )
510+ for _ , cr := range combineReports (c , d .Removed , d .Added , archiveOrImage ) {
511+ fileMove (ctx , c , cr .RemovedFR , cr .AddedFR , cr .Removed , cr .Added , d , cr .Score , src , dest , archiveOrImage )
485512 }
486513}
487514
488- func fileMove (ctx context.Context , c malcontent.Config , fr , tr * malcontent.FileReport , rpath , apath string , d * malcontent.DiffReport , score float64 , _ , dest ScanResult , isImage bool ) {
515+ func fileMove (ctx context.Context , c malcontent.Config , fr , tr * malcontent.FileReport , rpath , apath string , d * malcontent.DiffReport , score float64 , src ScanResult , dest ScanResult , archiveOrImage bool ) {
489516 if ctx .Err () != nil {
490517 return
491518 }
492519
493- minRisk := int ( math . Min ( float64 ( c .MinRisk ), float64 ( c .MinFileRisk )) )
520+ minRisk := min ( c .MinRisk , c .MinFileRisk )
494521 if fr .RiskScore < minRisk && tr .RiskScore < minRisk {
495522 clog .FromContext (ctx ).Info ("diff does not meet min trigger level" , slog .Any ("path" , tr .Path ))
496523 return
@@ -502,11 +529,16 @@ func fileMove(ctx context.Context, c malcontent.Config, fr, tr *malcontent.FileR
502529 return
503530 }
504531
532+ // handle the same path cleanup as above
533+ path := CleanPath (tr .Path , filepath .Dir (tr .ArchiveRoot ))
534+ prevPath := CleanPath (fr .Path , filepath .Dir (fr .ArchiveRoot ))
535+ prevRelPath := CleanPath (fr .Path , filepath .Dir (fr .ArchiveRoot ))
536+
505537 // We think that this file moved from rpath to apath.
506538 abs := & malcontent.FileReport {
507- Path : tr . Path ,
508- PreviousPath : fr . Path ,
509- PreviousRelPath : rpath ,
539+ Path : path ,
540+ PreviousPath : prevPath ,
541+ PreviousRelPath : prevRelPath ,
510542 PreviousRelPathScore : score ,
511543
512544 Behaviors : []* malcontent.Behavior {},
@@ -522,6 +554,8 @@ func fileMove(ctx context.Context, c malcontent.Config, fr, tr *malcontent.FileR
522554 if ! behaviorExists (tb , fr .Behaviors ) {
523555 tb .DiffAdded = true
524556 abs .Behaviors = append (abs .Behaviors , tb )
557+ } else {
558+ abs .Behaviors = append (abs .Behaviors , tb )
525559 }
526560 }
527561
@@ -530,16 +564,22 @@ func fileMove(ctx context.Context, c malcontent.Config, fr, tr *malcontent.FileR
530564 if ! behaviorExists (fb , tr .Behaviors ) {
531565 fb .DiffRemoved = true
532566 abs .Behaviors = append (abs .Behaviors , fb )
533- }
534- if behaviorExists (fb , tr .Behaviors ) {
567+ } else {
535568 abs .Behaviors = append (abs .Behaviors , fb )
536569 }
537570 }
538571
539- if isImage {
540- abs .Path = strings .TrimPrefix (abs .Path , "/private" )
541- abs .Path = fmt .Sprintf ("%s ∴ %s" , dest .imageURI , strings .TrimPrefix (abs .Path , dest .tmpRoot ))
572+ if archiveOrImage {
573+ abs .Path = CleanPath (abs .Path , "/private" )
574+ abs .PreviousPath = CleanPath (abs .PreviousPath , "/private" )
575+ if dest .imageURI != "" {
576+ abs .Path = fmt .Sprintf ("%s ∴ %s" , dest .imageURI , strings .TrimPrefix (abs .Path , dest .tmpRoot ))
577+ }
578+ if src .imageURI != "" {
579+ abs .PreviousPath = fmt .Sprintf ("%s ∴ %s" , src .imageURI , strings .TrimPrefix (abs .PreviousPath , src .tmpRoot ))
580+ }
542581 }
582+
543583 d .Modified .Set (apath , abs )
544584 d .Removed .Delete (rpath )
545585 d .Added .Delete (apath )
@@ -554,14 +594,14 @@ func filterDiff(ctx context.Context, c malcontent.Config, fr, tr *malcontent.Fil
554594 return false
555595 }
556596
557- if c .FileRiskChange && fr .RiskScore == tr .RiskScore {
597+ switch {
598+ case c .FileRiskChange && fr .RiskScore == tr .RiskScore :
558599 clog .FromContext (ctx ).Info ("dropping result because diff scores were the same" , slog .Any ("paths" , fmt .Sprintf ("%s (%d) %s (%d)" , fr .Path , fr .RiskScore , tr .Path , tr .RiskScore )))
559600 return true
560- }
561- if c .FileRiskIncrease && fr .RiskScore >= tr .RiskScore {
601+ case c .FileRiskIncrease && fr .RiskScore >= tr .RiskScore :
562602 clog .FromContext (ctx ).Info ("dropping result because old score was the same or higher than the new score" , slog .Any ("paths " , fmt .Sprintf ("%s (%d) %s (%d)" , fr .Path , fr .RiskScore , tr .Path , tr .RiskScore )))
563603 return true
604+ default :
605+ return false
564606 }
565-
566- return false
567607}
0 commit comments