Skip to content

Commit d41964f

Browse files
author
jguerreiro
committed
feat(sf): add main package
1 parent a44c8d3 commit d41964f

File tree

2 files changed

+284
-1
lines changed

2 files changed

+284
-1
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,6 @@
2323
.DS_store
2424

2525
/testdata
26-
src-fingerprint
26+
/src-fingerprint
2727
.cache_ggshield
2828
dist/

cmd/src-fingerprint/main.go

Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,283 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
"io"
6+
"os"
7+
"srcfingerprint"
8+
"srcfingerprint/cloner"
9+
"srcfingerprint/exporter"
10+
"srcfingerprint/provider"
11+
"time"
12+
13+
log "github.com/sirupsen/logrus"
14+
"github.com/urfave/cli/v2"
15+
)
16+
17+
var version = "unknown"
18+
var builtBy = "unknown"
19+
var date = "unknown"
20+
21+
const MaxPipelineEvents = 100
22+
23+
func runExtract(pipeline *srcfingerprint.Pipeline, user string, after string) chan srcfingerprint.PipelineEvent {
24+
// buffer it a bit so it won't block if this is going too fast
25+
ch := make(chan srcfingerprint.PipelineEvent, MaxPipelineEvents)
26+
27+
go func(eventChannel chan srcfingerprint.PipelineEvent) {
28+
defer close(eventChannel)
29+
pipeline.ExtractRepositories(user, after, eventChannel)
30+
}(ch)
31+
32+
return ch
33+
}
34+
35+
func getProvider(providerStr string, token string, providerOptions provider.Options) (provider.Provider, error) {
36+
switch providerStr {
37+
case "github":
38+
return provider.NewGitHubProvider(token, providerOptions), nil
39+
case "gitlab":
40+
return provider.NewGitLabProvider(token, providerOptions), nil
41+
case "bitbucket":
42+
return provider.NewBitbucketProvider(token, providerOptions), nil
43+
case "repository":
44+
return provider.NewGenericProvider(providerOptions), nil
45+
default:
46+
return nil, fmt.Errorf("invalid provider string: %s", providerStr)
47+
}
48+
}
49+
50+
func getExporter(exporterStr string, output io.Writer) (exporter.Exporter, error) {
51+
switch exporterStr {
52+
case "json":
53+
return exporter.NewJSONExporter(output), nil
54+
case "jsonl":
55+
return exporter.NewJSONLExporter(output), nil
56+
default:
57+
return nil, fmt.Errorf("invalid export format: %s", exporterStr)
58+
}
59+
}
60+
61+
type authorInfo struct {
62+
Name string
63+
Email string
64+
Count int
65+
LastCommitDate time.Time
66+
}
67+
68+
const DefaultClonerN = 8
69+
70+
func main() {
71+
cli.VersionFlag = &cli.BoolFlag{
72+
Name: "version",
73+
Usage: "print version",
74+
}
75+
76+
cli.VersionPrinter = func(c *cli.Context) {
77+
log.Printf("src-fingerprint version=%s date=%s builtBy=%s\n", version, date, builtBy)
78+
}
79+
80+
app := &cli.App{
81+
Name: "src-fingerprint",
82+
Version: "unknown",
83+
Usage: "Collect user/organization file hashes from your vcs provider of choice",
84+
Flags: []cli.Flag{
85+
&cli.BoolFlag{
86+
Name: "verbose",
87+
Aliases: []string{"v"},
88+
Value: false,
89+
Usage: "verbose logging",
90+
},
91+
&cli.BoolFlag{
92+
Name: "extract-forks",
93+
Aliases: []string{"e"},
94+
Value: false,
95+
Usage: "extract fork repositories when possible",
96+
},
97+
&cli.BoolFlag{
98+
Name: "skip-archived",
99+
Value: false,
100+
Usage: "skip archived repositories",
101+
},
102+
&cli.StringFlag{
103+
Name: "output",
104+
Aliases: []string{"o"},
105+
Value: "-",
106+
Usage: "set output path to `FILE`. stdout by default",
107+
},
108+
&cli.StringFlag{
109+
Name: "clone-dir",
110+
Value: "-",
111+
Usage: "set cloning location for repositories",
112+
},
113+
&cli.StringFlag{
114+
Name: "after",
115+
Value: "",
116+
Usage: "set a commit date after which we want to collect fileshas",
117+
},
118+
&cli.StringFlag{
119+
Name: "provider",
120+
Aliases: []string{"p"},
121+
Required: true,
122+
Usage: "vcs provider. options: 'gitlab'/'github'/'bitbucket'/'repository'",
123+
},
124+
&cli.StringFlag{
125+
Name: "export-format",
126+
Value: "jsonl",
127+
Usage: "export format: 'jsonl'/'json'. jsonl by default",
128+
},
129+
&cli.StringFlag{
130+
Name: "token",
131+
Aliases: []string{"t"},
132+
Usage: "token for vcs access.",
133+
EnvVars: []string{"VCS_TOKEN", "GITLAB_TOKEN", "GITHUB_TOKEN"},
134+
},
135+
&cli.StringFlag{
136+
Name: "object",
137+
Aliases: []string{"u"},
138+
Usage: "repository|org|group to scrape. If not specified all reachable repositories will be collected.",
139+
},
140+
&cli.IntFlag{
141+
Name: "cloners",
142+
Value: DefaultClonerN,
143+
Usage: "number of cloners, more cloners means more memory usage",
144+
},
145+
&cli.StringFlag{
146+
Name: "provider-url",
147+
Usage: "base URL of the Git provider API. If not set, defaults URL are used.",
148+
},
149+
},
150+
Action: mainAction,
151+
}
152+
153+
if err := app.Run(os.Args); err != nil {
154+
log.Fatal(err)
155+
}
156+
}
157+
158+
func mainAction(c *cli.Context) error {
159+
if c.Bool("verbose") {
160+
log.SetLevel(log.InfoLevel)
161+
} else {
162+
log.SetLevel(log.ErrorLevel)
163+
}
164+
165+
output := os.Stdout
166+
167+
if c.String("output") != "-" {
168+
changedOutput, err := os.OpenFile(c.String("output"), os.O_RDWR|os.O_CREATE, os.ModePerm)
169+
if err != nil {
170+
return cli.Exit(fmt.Sprintf("Could not open output file: %s", err), 1)
171+
}
172+
173+
output = changedOutput
174+
175+
defer output.Close()
176+
}
177+
178+
var srcCloner cloner.Cloner = cloner.NewDiskCloner(c.String("clone-dir"))
179+
180+
providerOptions := provider.Options{
181+
OmitForks: !c.Bool("extract-forks"),
182+
SkipArchived: c.Bool("skip-archived"),
183+
BaseURL: c.String("provider-url"),
184+
}
185+
186+
defer func() {
187+
if r := recover(); r != nil {
188+
log.Errorln(r)
189+
}
190+
}()
191+
192+
srcProvider, err := getProvider(c.String("provider"), c.String("token"), providerOptions)
193+
if err != nil {
194+
cli.ShowAppHelpAndExit(c, 1)
195+
}
196+
197+
outputExporter, err := getExporter(c.String("export-format"), output)
198+
if err != nil {
199+
cli.ShowAppHelpAndExit(c, 1)
200+
}
201+
202+
pipeline := srcfingerprint.Pipeline{
203+
Provider: srcProvider,
204+
Cloner: srcCloner,
205+
Analyzer: &srcfingerprint.Analyzer{},
206+
ClonersCount: c.Int("cloners"),
207+
}
208+
209+
ticker := time.Tick(1 * time.Second)
210+
211+
eventChannel := runExtract(&pipeline, c.String("object"), c.String("after"))
212+
213+
// runtime stats
214+
var (
215+
totalRepo int
216+
doneRepo int
217+
gitFilesCount int
218+
)
219+
220+
authors := make(map[string]*authorInfo)
221+
222+
loop:
223+
for {
224+
select {
225+
case event, opened := <-eventChannel:
226+
if !opened {
227+
break loop
228+
}
229+
230+
switch typedEvent := event.(type) {
231+
case srcfingerprint.RepositoryListPipelineEvent:
232+
totalRepo = len(typedEvent.Repositories)
233+
case srcfingerprint.RepositoryPipelineEvent:
234+
if typedEvent.Finished {
235+
doneRepo++
236+
}
237+
case srcfingerprint.ResultCommitPipelineEvent:
238+
identity := typedEvent.Author.Name + typedEvent.Author.Email
239+
if _, identityExists := authors[identity]; !identityExists {
240+
authors[identity] = &authorInfo{}
241+
}
242+
commit := typedEvent.Commit
243+
authors[identity].Count++
244+
authors[identity].Name = typedEvent.Author.Name
245+
authors[identity].Email = typedEvent.Author.Email
246+
if commit.Author.When.UTC().After(authors[identity].LastCommitDate) {
247+
authors[identity].LastCommitDate = commit.Author.When.UTC()
248+
}
249+
// Collecting gitFiles
250+
case srcfingerprint.ResultGitFilePipelineEvent:
251+
gitFilesCount++
252+
err := outputExporter.AddElement(&exporter.ExportGitFile{
253+
RepositoryName: typedEvent.Repository.GetName(),
254+
RepositoryPrivate: typedEvent.Repository.GetPrivate(),
255+
GitFile: *typedEvent.GitFile,
256+
})
257+
258+
if err != nil {
259+
log.Warnln("unable to export git file", err)
260+
}
261+
}
262+
case <-ticker:
263+
if totalRepo == 0 {
264+
continue
265+
}
266+
267+
log.Infof("%v/%v repos: %v files analyzed\n",
268+
doneRepo, totalRepo, gitFilesCount)
269+
}
270+
}
271+
272+
log.Infof("Final stats:\n%v/%v repos: %v files analyzed\n",
273+
doneRepo, totalRepo, gitFilesCount)
274+
log.Infof("Dumping to output %v\n", c.String("output"))
275+
276+
if err := outputExporter.Close(); err != nil {
277+
log.Errorln("Could not save output", err)
278+
}
279+
280+
log.Infoln("Done")
281+
282+
return nil
283+
}

0 commit comments

Comments
 (0)