@@ -19,11 +19,13 @@ import (
1919 "errors"
2020 "fmt"
2121 "io"
22+ "io/fs"
2223 "log/slog"
2324 "os"
2425 "path"
2526 "path/filepath"
2627 "regexp"
28+ "slices"
2729 "strings"
2830 "time"
2931
@@ -389,3 +391,155 @@ func copyFile(dst, src string) (err error) {
389391
390392 return err
391393}
394+
395+ // clean removes files and directories from a root directory based on remove and preserve patterns.
396+ //
397+ // It first determines the paths to remove by applying the removePatterns and then excluding any paths
398+ // that match the preservePatterns. It then separates the remaining paths into files and directories and
399+ // removes them, ensuring that directories are removed last.
400+ //
401+ // This logic is ported from owlbot logic: https://github.com/googleapis/repo-automation-bots/blob/12dad68640960290910b660e4325630c9ace494b/packages/owl-bot/src/copy-code.ts#L1027
402+ func clean (rootDir string , removePatterns , preservePatterns []string ) error {
403+ slog .Info ("cleaning directory" , "path" , rootDir )
404+ finalPathsToRemove , err := deriveFinalPathsToRemove (rootDir , removePatterns , preservePatterns )
405+ if err != nil {
406+ return err
407+ }
408+
409+ filesToRemove , dirsToRemove , err := separateFilesAndDirs (rootDir , finalPathsToRemove )
410+ if err != nil {
411+ return err
412+ }
413+
414+ // Remove files first, then directories.
415+ for _ , file := range filesToRemove {
416+ slog .Info ("removing file" , "path" , file )
417+ if err := os .Remove (filepath .Join (rootDir , file )); err != nil {
418+ return err
419+ }
420+ }
421+
422+ sortDirsByDepth (dirsToRemove )
423+
424+ for _ , dir := range dirsToRemove {
425+ slog .Info ("removing directory" , "path" , dir )
426+ if err := os .Remove (filepath .Join (rootDir , dir )); err != nil {
427+ // It's possible the directory is not empty due to preserved files.
428+ slog .Warn ("failed to remove directory, it may not be empty" , "dir" , dir , "err" , err )
429+ }
430+ }
431+
432+ return nil
433+ }
434+
435+ // sortDirsByDepth sorts directories by depth (descending) to remove children first.
436+ func sortDirsByDepth (dirs []string ) {
437+ slices .SortFunc (dirs , func (a , b string ) int {
438+ return strings .Count (b , string (filepath .Separator )) - strings .Count (a , string (filepath .Separator ))
439+ })
440+ }
441+
442+ // allPaths walks the directory tree rooted at rootDir and returns a slice of all
443+ // file and directory paths, relative to rootDir.
444+ func allPaths (rootDir string ) ([]string , error ) {
445+ var paths []string
446+ err := filepath .WalkDir (rootDir , func (path string , d fs.DirEntry , err error ) error {
447+ if err != nil {
448+ return err
449+ }
450+ relPath , err := filepath .Rel (rootDir , path )
451+ if err != nil {
452+ return err
453+ }
454+ paths = append (paths , relPath )
455+ return nil
456+ })
457+ return paths , err
458+ }
459+
460+ // filterPaths returns a new slice containing only the paths from the input slice
461+ // that match at least one of the provided regular expressions.
462+ func filterPaths (paths []string , regexps []* regexp.Regexp ) []string {
463+ var filtered []string
464+ for _ , path := range paths {
465+ for _ , re := range regexps {
466+ if re .MatchString (path ) {
467+ filtered = append (filtered , path )
468+ break
469+ }
470+ }
471+ }
472+ return filtered
473+ }
474+
475+ // deriveFinalPathsToRemove determines the final set of paths to be removed. It
476+ // starts with all paths under rootDir, filters them based on removePatterns,
477+ // and then excludes any paths that match preservePatterns.
478+ func deriveFinalPathsToRemove (rootDir string , removePatterns , preservePatterns []string ) ([]string , error ) {
479+ removeRegexps , err := compileRegexps (removePatterns )
480+ if err != nil {
481+ return nil , err
482+ }
483+ preserveRegexps , err := compileRegexps (preservePatterns )
484+ if err != nil {
485+ return nil , err
486+ }
487+
488+ allPaths , err := allPaths (rootDir )
489+ if err != nil {
490+ return nil , err
491+ }
492+
493+ pathsToRemove := filterPaths (allPaths , removeRegexps )
494+ pathsToPreserve := filterPaths (pathsToRemove , preserveRegexps )
495+
496+ // delete pathsToPreserve from pathsToRemove.
497+ pathsToDelete := make (map [string ]bool )
498+ for _ , p := range pathsToPreserve {
499+ pathsToDelete [p ] = true
500+ }
501+ finalPathsToRemove := slices .DeleteFunc (pathsToRemove , func (path string ) bool {
502+ return pathsToDelete [path ]
503+ })
504+ return finalPathsToRemove , nil
505+ }
506+
507+ // separateFilesAndDirs takes a list of paths and categorizes them into files
508+ // and directories. It uses os.Lstat to avoid following symlinks, treating them
509+ // as files. Paths that do not exist are silently ignored.
510+ func separateFilesAndDirs (rootDir string , paths []string ) ([]string , []string , error ) {
511+ var files , dirs []string
512+ for _ , path := range paths {
513+ info , err := os .Lstat (filepath .Join (rootDir , path ))
514+ if err != nil {
515+ if errors .Is (err , os .ErrNotExist ) {
516+ // The file or directory may have already been removed.
517+ continue
518+ }
519+ // For any other error (permissions, I/O, etc.)
520+ return nil , nil , fmt .Errorf ("failed to stat path %q: %w" , path , err )
521+
522+ }
523+ if info .IsDir () {
524+ dirs = append (dirs , path )
525+ } else {
526+ files = append (files , path )
527+ }
528+ }
529+ return files , dirs , nil
530+ }
531+
532+ // compileRegexps takes a slice of string patterns and compiles each one into a
533+ // regular expression. It returns a slice of compiled regexps or an error if any
534+ // pattern is invalid.
535+ func compileRegexps (patterns []string ) ([]* regexp.Regexp , error ) {
536+ var regexps []* regexp.Regexp
537+ for _ , pattern := range patterns {
538+ re , err := regexp .Compile (pattern )
539+ if err != nil {
540+ return nil , fmt .Errorf ("invalid regex %q: %w" , pattern , err )
541+ }
542+ regexps = append (regexps , re )
543+ }
544+ return regexps , nil
545+ }
0 commit comments