Skip to content

Commit 160f1a4

Browse files
authored
Export git.handleBinary and getSafeRemoteURL (#3921)
This PR exports some functionality from the git source so that it's usable in a (new, under development) git-flavored source that does not need to wrap an entire git source in order to operate. One is getSafeRemoteURL. This is a straightforward change. The other is handleBinary. Right now, all git binary file handling is invoked from Git.ScanRepo, which is itself invoked by our various git-flavored sources. Since the actual binary file handling function handleBinary only used two pieces of information from the "git" source, I just removed its receiver. One of the pieces of information was a flag that caused the function to be skipped entirely; I moved this one to the (two) call sites. The other I just forwarded as an argument.
1 parent b332fea commit 160f1a4

File tree

1 file changed

+27
-15
lines changed

1 file changed

+27
-15
lines changed

pkg/sources/git/git.go

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,7 @@ func getGitDir(path string, options *ScanOptions) string {
543543

544544
func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string, scanOptions *ScanOptions, reporter sources.ChunkReporter) error {
545545
// Get the remote URL for reporting (may be empty)
546-
remoteURL := getSafeRemoteURL(repo, "origin")
546+
remoteURL := GetSafeRemoteURL(repo, "origin")
547547
var repoCtx context.Context
548548

549549
if ctx.Value("repo") == nil {
@@ -642,6 +642,15 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string
642642

643643
// Handle binary files by reading the entire file rather than using the diff.
644644
if diff.IsBinary {
645+
commitHash := plumbing.NewHash(fullHash)
646+
647+
if s.skipBinaries || feature.ForceSkipBinaries.Load() {
648+
logger.V(5).Info("skipping binary file",
649+
"commit", commitHash.String()[:7],
650+
"path", path)
651+
continue
652+
}
653+
645654
metadata := s.sourceMetadataFunc(fileName, email, fullHash, when, remoteURL, 0)
646655
chunkSkel := &sources.Chunk{
647656
SourceName: s.sourceName,
@@ -652,8 +661,7 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string
652661
Verify: s.verify,
653662
}
654663

655-
commitHash := plumbing.NewHash(fullHash)
656-
if err := s.handleBinary(ctx, gitDir, reporter, chunkSkel, commitHash, fileName); err != nil {
664+
if err := HandleBinary(ctx, gitDir, reporter, chunkSkel, commitHash, fileName, s.skipArchives); err != nil {
657665
logger.Error(
658666
err,
659667
"error handling binary file",
@@ -793,7 +801,7 @@ func (s *Git) gitChunk(ctx context.Context, diff *gitparse.Diff, fileName, email
793801
// ScanStaged chunks staged changes.
794802
func (s *Git) ScanStaged(ctx context.Context, repo *git.Repository, path string, scanOptions *ScanOptions, reporter sources.ChunkReporter) error {
795803
// Get the URL metadata for reporting (may be empty).
796-
urlMetadata := getSafeRemoteURL(repo, "origin")
804+
urlMetadata := GetSafeRemoteURL(repo, "origin")
797805

798806
diffChan, err := s.parser.Staged(ctx, path)
799807
if err != nil {
@@ -864,6 +872,14 @@ func (s *Git) ScanStaged(ctx context.Context, repo *git.Repository, path string,
864872
// Handle binary files by reading the entire file rather than using the diff.
865873
if diff.IsBinary {
866874
commitHash := plumbing.NewHash(fullHash)
875+
876+
if s.skipBinaries || feature.ForceSkipBinaries.Load() {
877+
logger.V(5).Info("skipping binary file",
878+
"commit", commitHash.String()[:7],
879+
"path", path)
880+
continue
881+
}
882+
867883
metadata := s.sourceMetadataFunc(fileName, email, "Staged", when, urlMetadata, 0)
868884
chunkSkel := &sources.Chunk{
869885
SourceName: s.sourceName,
@@ -873,7 +889,7 @@ func (s *Git) ScanStaged(ctx context.Context, repo *git.Repository, path string,
873889
SourceMetadata: metadata,
874890
Verify: s.verify,
875891
}
876-
if err := s.handleBinary(ctx, gitDir, reporter, chunkSkel, commitHash, fileName); err != nil {
892+
if err := HandleBinary(ctx, gitDir, reporter, chunkSkel, commitHash, fileName, s.skipArchives); err != nil {
877893
logger.Error(err, "error handling binary file")
878894
}
879895
continue
@@ -938,7 +954,7 @@ func (s *Git) ScanRepo(ctx context.Context, repo *git.Repository, repoPath strin
938954
remotes, _ := repo.Remotes()
939955
repoURL := "Could not get remote for repo"
940956
if len(remotes) != 0 {
941-
repoURL = getSafeRemoteURL(repo, remotes[0].Config().Name)
957+
repoURL = GetSafeRemoteURL(repo, remotes[0].Config().Name)
942958
}
943959
logger = logger.WithValues("repo", repoURL)
944960
}
@@ -1190,10 +1206,10 @@ func PrepareRepo(ctx context.Context, uriString string) (string, bool, error) {
11901206
return path, remote, nil
11911207
}
11921208

1193-
// getSafeRemoteURL is a helper function that will attempt to get a safe URL first
1209+
// GetSafeRemoteURL is a helper function that will attempt to get a safe URL first
11941210
// from the preferred remote name, falling back to the first remote name
11951211
// available, or an empty string if there are no remotes.
1196-
func getSafeRemoteURL(repo *git.Repository, preferred string) string {
1212+
func GetSafeRemoteURL(repo *git.Repository, preferred string) string {
11971213
remote, err := repo.Remote(preferred)
11981214
if err != nil {
11991215
var remotes []*git.Remote
@@ -1213,13 +1229,14 @@ func getSafeRemoteURL(repo *git.Repository, preferred string) string {
12131229
return safeURL
12141230
}
12151231

1216-
func (s *Git) handleBinary(
1232+
func HandleBinary(
12171233
ctx context.Context,
12181234
gitDir string,
12191235
reporter sources.ChunkReporter,
12201236
chunkSkel *sources.Chunk,
12211237
commitHash plumbing.Hash,
12221238
path string,
1239+
skipArchives bool,
12231240
) (err error) {
12241241
fileCtx := context.WithValues(ctx, "commit", commitHash.String()[:7], "path", path)
12251242
fileCtx.Logger().V(5).Info("handling binary file")
@@ -1229,11 +1246,6 @@ func (s *Git) handleBinary(
12291246
return nil
12301247
}
12311248

1232-
if s.skipBinaries || feature.ForceSkipBinaries.Load() {
1233-
fileCtx.Logger().V(5).Info("skipping binary file", "path", path)
1234-
return nil
1235-
}
1236-
12371249
const (
12381250
cmdTimeout = 60 * time.Second
12391251
waitDelay = 5 * time.Second
@@ -1293,7 +1305,7 @@ func (s *Git) handleBinary(
12931305
err = errors.Join(err, copyErr, waitErr)
12941306
}()
12951307

1296-
return handlers.HandleFile(catFileCtx, stdout, chunkSkel, reporter, handlers.WithSkipArchives(s.skipArchives))
1308+
return handlers.HandleFile(catFileCtx, stdout, chunkSkel, reporter, handlers.WithSkipArchives(skipArchives))
12971309
}
12981310

12991311
func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) error {

0 commit comments

Comments
 (0)