Skip to content

Commit 658a058

Browse files
pierrelalannejguerreiro
authored andcommitted
feat(extractor): add an option --after to select only a given time window in fileshas
1 parent 37df96e commit 658a058

File tree

3 files changed

+16
-8
lines changed

3 files changed

+16
-8
lines changed

extractor.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package srcfingerprint
33
import (
44
"bufio"
55
"encoding/json"
6+
"fmt"
67
"os"
78
"os/exec"
89

@@ -30,9 +31,16 @@ type FastExtractor struct {
3031
ChanGitFiles chan *GitFile
3132
}
3233

33-
func (fe *FastExtractor) Run(path string) chan *GitFile {
34+
func (fe *FastExtractor) Run(path string, after string) chan *GitFile {
3435
log.Infof("Extracting commits from path %s\n", path)
35-
cmdBase := "git rev-list --objects --all | git cat-file --batch-check='{\"sha\": \"%(objectname)\", \"type\": \"%(objecttype)\", \"filepath\": \"%(rest)\", \"size\": \"%(objectsize)\"}' | grep '\"type\": \"blob\"'" //nolint
36+
37+
cmdRevList := "git rev-list --objects --all"
38+
39+
if after != "" {
40+
cmdRevList = fmt.Sprintf("git rev-list --objects --all --after '%s'", after)
41+
}
42+
43+
cmdBase := cmdRevList + "| git cat-file --batch-check='{\"sha\": \"%(objectname)\", \"type\": \"%(objecttype)\", \"filepath\": \"%(rest)\", \"size\": \"%(objectsize)\"}' | grep '\"type\": \"blob\"'" //nolint
3644
cmd := exec.Command("bash", "-c", cmdBase)
3745
cmd.Dir = path
3846

pipeline.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ func (p *Pipeline) gather(
9090
}
9191

9292
// ExtractRepository extracts for a single repository.
93-
func (p *Pipeline) ExtractRepository(repository provider.GitRepository, eventChan chan<- PipelineEvent) error {
93+
func (p *Pipeline) ExtractRepository(repository provider.GitRepository, after string, eventChan chan<- PipelineEvent) error { // nolint
9494
defer p.publishEvent(eventChan, RepositoryPipelineEvent{true, repository.GetPrivate(), repository.GetName()})
9595

9696
log.Infof("Cloning repo %v\n", repository.GetName())
@@ -103,7 +103,7 @@ func (p *Pipeline) ExtractRepository(repository provider.GitRepository, eventCha
103103
log.Infof("Cloned repo %v (size: %v KB)\n", repository.GetName(), repository.GetStorageSize())
104104

105105
extractorGitFile := NewFastExtractor()
106-
extractorGitFile.Run(gitRepository)
106+
extractorGitFile.Run(gitRepository, after)
107107

108108
for gitFile := range extractorGitFile.ChanGitFiles {
109109
p.publishEvent(eventChan, ResultGitFilePipelineEvent{repository, gitFile})
@@ -119,7 +119,7 @@ const (
119119
)
120120

121121
// ExtractRepositories extract repositories and analyze it for a given user and provider.
122-
func (p *Pipeline) ExtractRepositories(user string, eventChan chan<- PipelineEvent) {
122+
func (p *Pipeline) ExtractRepositories(user string, after string, eventChan chan<- PipelineEvent) {
123123
log.Infof("Extracting user %v\n", user)
124124

125125
repositoryChannel := make(chan provider.GitRepository)
@@ -142,7 +142,7 @@ func (p *Pipeline) ExtractRepositories(user string, eventChan chan<- PipelineEve
142142
defer wg.Done()
143143

144144
for repository := range repositoryChannel {
145-
if err := p.ExtractRepository(repository, eventChan); err != nil {
145+
if err := p.ExtractRepository(repository, after, eventChan); err != nil {
146146
log.Errorf("extracting %v failed: %v\n", repository.GetName(), err)
147147
}
148148
}

pipeline_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ func (suite *PipelineTestSuite) TestExtractGitRepository() {
9696
go func() {
9797
defer close(eventChan)
9898

99-
pipeline.ExtractRepository(repository, eventChan)
99+
pipeline.ExtractRepository(repository, "", eventChan)
100100
}()
101101

102102
events := make([]PipelineEvent, 0)
@@ -136,7 +136,7 @@ func (suite *PipelineTestSuite) TestExtractRepositories() {
136136
go func() {
137137
defer close(eventChan)
138138

139-
pipeline.ExtractRepositories("user", eventChan)
139+
pipeline.ExtractRepositories("user", "", eventChan)
140140
}()
141141

142142
events := make([]PipelineEvent, 0)

0 commit comments

Comments
 (0)