Skip to content

Commit 0c0d4ef

Browse files
author
jguerreiro
committed
feat(cloner): add native git cloning
1 parent 9d50e21 commit 0c0d4ef

File tree

11 files changed

+117
-99
lines changed

11 files changed

+117
-99
lines changed

README.md

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -137,9 +137,7 @@ The cmd/src-fingerprint package contains the binary code. It reads from CLI and
137137

138138
#### Cloning
139139

140-
- go-git: https://github.com/src-d/go-git
140+
- native wrapped git command
141141

142-
### Issues
143-
144-
- Repo size seems not to work on go gitlab wrapper.
145-
- Channels are cheap. Complex design overloading semantics isn't.
142+
Using go-git resulted in in-memory cloning (stream to memory and then to directory).
143+
This caused too high peaks of memory unsuitable for small VMs.

cloner/cloner.go

Lines changed: 77 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,95 @@
11
package cloner
22

33
import (
4-
"io/ioutil"
4+
"bytes"
5+
"errors"
6+
"os"
7+
"os/exec"
8+
"path/filepath"
9+
"strings"
510

6-
"gopkg.in/src-d/go-billy.v4/osfs"
7-
git "gopkg.in/src-d/go-git.v4"
8-
"gopkg.in/src-d/go-git.v4/plumbing/cache"
9-
"gopkg.in/src-d/go-git.v4/plumbing/transport"
10-
"gopkg.in/src-d/go-git.v4/storage/filesystem"
11+
log "github.com/sirupsen/logrus"
1112
)
1213

14+
const gitExitUnclean = 128
15+
1316
// Cloner represents a cloner of git repository.
1417
type Cloner interface {
15-
CloneRepository(url string, auth transport.AuthMethod) (*git.Repository, error)
18+
CloneRepository(url string) (string, error)
1619
}
1720

1821
// DiskCloner closes a git repository on disk in a temporary file.
19-
type DiskCloner struct{}
22+
type DiskCloner struct {
23+
BaseDir string
24+
}
25+
26+
// NewDiskCloner creates a new DiskCloner.
27+
// If baseDir is an empty string the default user cache folder + "/srcfingerprint"
28+
// will be used, if this is not available /tmp will be used.
29+
func NewDiskCloner(baseDir string) *DiskCloner {
30+
diskCloner := &DiskCloner{BaseDir: "/tmp"}
31+
32+
if baseDir != "" {
33+
if _, err := os.Stat(baseDir); !os.IsNotExist(err) {
34+
diskCloner.BaseDir = baseDir
35+
36+
return diskCloner
37+
}
38+
}
39+
40+
if cacheDir, err := os.UserCacheDir(); err == nil {
41+
cacheDir = filepath.Join(cacheDir, "srcfingerprint")
42+
diskCloner.BaseDir = cacheDir
43+
44+
return diskCloner
45+
}
46+
47+
return diskCloner
48+
}
2049

2150
// CloneRepository clones a git repository given its information.
22-
func (*DiskCloner) CloneRepository(url string, auth transport.AuthMethod) (*git.Repository, error) {
23-
tmpDir, err := ioutil.TempDir("", "fs-")
51+
func (d *DiskCloner) CloneRepository(url string) (string, error) {
52+
tmpDir, err := os.MkdirTemp(d.BaseDir, "srcfingerprint-")
2453
if err != nil {
25-
return nil, err
54+
return "", err
2655
}
2756

28-
fs := osfs.New(tmpDir)
57+
if err := cloneGitRepository(tmpDir, url); err != nil {
58+
os.RemoveAll(tmpDir)
59+
60+
return "", err
61+
}
62+
63+
return tmpDir, nil
64+
}
65+
66+
func cloneGitRepository(destDir, gitRepoURL string) error {
67+
var outbuf, errbuf bytes.Buffer
68+
// git clone github.com/author/name.git /tmp/workdir/author-name/clone
69+
cmd := exec.Command("git", "clone", gitRepoURL, destDir)
70+
cmd.Stdout = &outbuf
71+
cmd.Stderr = &errbuf
72+
73+
cmd.Env = append(os.Environ(), "GIT_TERMINAL_PROMPT=0")
74+
75+
err := cmd.Run()
76+
if exitError, ok := err.(*exec.ExitError); ok {
77+
stderr := strings.TrimSpace(errbuf.String())
78+
79+
if exitError.ExitCode() == gitExitUnclean {
80+
log.WithError(err).WithFields(log.Fields{
81+
"op": "gitError",
82+
"stderr": stderr,
83+
}).WithField("url", gitRepoURL).Warnf("missing repo")
84+
} else {
85+
log.WithError(err).WithFields(log.Fields{
86+
"op": "gitError",
87+
"stderr": stderr,
88+
}).WithField("url", gitRepoURL).Errorf("unhandled git error")
89+
}
90+
91+
return errors.New("")
92+
}
2993

30-
return git.Clone(filesystem.NewStorage(fs, cache.NewObjectLRUDefault()), nil, &git.CloneOptions{
31-
URL: url,
32-
Progress: ioutil.Discard,
33-
Auth: auth,
34-
})
94+
return err
3595
}

extractor.go

Lines changed: 3 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -3,32 +3,12 @@ package srcfingerprint
33
import (
44
"bufio"
55
"encoding/json"
6-
"errors"
76
"os"
87
"os/exec"
98

109
log "github.com/sirupsen/logrus"
11-
"gopkg.in/src-d/go-billy.v4/helper/chroot"
12-
git "gopkg.in/src-d/go-git.v4"
13-
"gopkg.in/src-d/go-git.v4/storage/filesystem"
1410
)
1511

16-
func GetBasePathGoGitRepo(r *git.Repository) (string, error) {
17-
// Try to grab the repository Storer
18-
s, ok := r.Storer.(*filesystem.Storage)
19-
if !ok {
20-
return "", errors.New("repository storage is not filesystem.Storage")
21-
}
22-
23-
// Try to get the underlying billy.Filesystem
24-
fs, ok := s.Filesystem().(*chroot.ChrootHelper)
25-
if !ok {
26-
return "", errors.New("filesystem is not chroot.ChrootHelper")
27-
}
28-
29-
return fs.Root(), nil
30-
}
31-
3212
type BaseExtractor interface {
3313
Next() (interface{}, error)
3414
}
@@ -50,25 +30,15 @@ type FastExtractor struct {
5030
ChanGitFiles chan *GitFile
5131
}
5232

53-
func (fe *FastExtractor) Run(repository *git.Repository) chan *GitFile {
54-
// https://gist.github.com/ochinchina/9e409a88e77c3cfd94c3
55-
path, err := GetBasePathGoGitRepo(repository)
56-
if err != nil {
57-
log.Fatal(err)
58-
}
59-
60-
err = os.Chdir(path)
61-
if err != nil {
62-
log.Fatal(err)
63-
}
64-
33+
func (fe *FastExtractor) Run(path string) chan *GitFile {
6534
log.Infof("Extracting commits from path %s", path)
6635
cmdBase := "git rev-list --objects --all | git cat-file --batch-check='{\"sha\": \"%(objectname)\", \"type\": \"%(objecttype)\", \"filepath\": \"%(rest)\", \"size\": \"%(objectsize:disk)\"}' | grep '\"type\": \"blob\"'" //nolint
6736
cmd := exec.Command("bash", "-c", cmdBase)
37+
cmd.Dir = path
6838

6939
stdout, err := cmd.StdoutPipe()
7040
if err != nil {
71-
log.Fatal(err)
41+
log.Fatalln(err)
7242
}
7343

7444
err = cmd.Start()

go.mod

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,11 @@ require (
1919
github.com/urfave/cli/v2 v2.3.0
2020
github.com/xanzy/go-gitlab v0.49.0
2121
github.com/xanzy/ssh-agent v0.3.0 // indirect
22-
golang.org/x/crypto v0.0.0-20210503195802-e9a32991a82e // indirect
23-
golang.org/x/net v0.0.0-20210505024714-0287a6fb4125 // indirect
22+
golang.org/x/crypto v0.0.0-20210506145944-38f3c27a63bf // indirect
23+
golang.org/x/net v0.0.0-20210510120150-4163338589ed // indirect
2424
golang.org/x/oauth2 v0.0.0-20210427180440-81ed05c6b58c
25-
golang.org/x/sys v0.0.0-20210503173754-0981d6026fa6 // indirect
25+
golang.org/x/sys v0.0.0-20210510120138-977fb7262007 // indirect
2626
golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba // indirect
2727
google.golang.org/appengine v1.6.7 // indirect
28-
gopkg.in/src-d/go-billy.v4 v4.3.2
2928
gopkg.in/src-d/go-git.v4 v4.13.1
3029
)

go.sum

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -207,8 +207,8 @@ golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8U
207207
golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
208208
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
209209
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
210-
golang.org/x/crypto v0.0.0-20210503195802-e9a32991a82e h1:8foAy0aoO5GkqCvAEJ4VC4P3zksTg4X4aJCDpZzmgQI=
211-
golang.org/x/crypto v0.0.0-20210503195802-e9a32991a82e/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8=
210+
golang.org/x/crypto v0.0.0-20210506145944-38f3c27a63bf h1:B2n+Zi5QeYRDAEodEu72OS36gmTWjgpXr2+cWcBW90o=
211+
golang.org/x/crypto v0.0.0-20210506145944-38f3c27a63bf/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8=
212212
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
213213
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
214214
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@@ -267,8 +267,8 @@ golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81R
267267
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
268268
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
269269
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
270-
golang.org/x/net v0.0.0-20210505024714-0287a6fb4125 h1:Ugb8sMTWuWRC3+sz5WeN/4kejDx9BvIwnPUiJBjJE+8=
271-
golang.org/x/net v0.0.0-20210505024714-0287a6fb4125/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
270+
golang.org/x/net v0.0.0-20210510120150-4163338589ed h1:p9UgmWI9wKpfYmgaV/IZKGdXc5qEK45tDwwwDyjS26I=
271+
golang.org/x/net v0.0.0-20210510120150-4163338589ed/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
272272
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
273273
golang.org/x/oauth2 v0.0.0-20181106182150-f42d05182288/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
274274
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@@ -318,8 +318,8 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w
318318
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
319319
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
320320
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
321-
golang.org/x/sys v0.0.0-20210503173754-0981d6026fa6 h1:cdsMqa2nXzqlgs183pHxtvoVwU7CyzaCTAUOg94af4c=
322-
golang.org/x/sys v0.0.0-20210503173754-0981d6026fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
321+
golang.org/x/sys v0.0.0-20210510120138-977fb7262007 h1:gG67DSER+11cZvqIMb8S8bt0vZtiN6xWYARwirrOSfE=
322+
golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
323323
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1 h1:v+OssWQX+hTHEmOBgwxdZxK4zHq3yOs8F9J7mk0PY8E=
324324
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
325325
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

pipeline_test.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,14 @@ type ProviderMock struct {
2424

2525
func (mock *ProviderMock) Gather(user string) ([]provider.GitRepository, error) {
2626
args := mock.Called(user)
27+
2728
return args.Get(0).([]provider.GitRepository), args.Error(1)
2829
}
2930

30-
func (mock *ProviderMock) CloneRepository(cloner cloner.Cloner, repository provider.GitRepository) (*git.Repository, error) {
31+
func (mock *ProviderMock) CloneRepository(cloner cloner.Cloner, repository provider.GitRepository) (string, error) {
3132
args := mock.Called(cloner, repository)
32-
return args.Get(0).(*git.Repository), args.Error(1)
33+
34+
return args.String(0), args.Error(1)
3335
}
3436

3537
type gitRepositoryMock struct{ name string }

provider/bitbucket.go

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@ import (
1212

1313
log "github.com/sirupsen/logrus"
1414
"github.com/suhaibmujahid/go-bitbucket-server/bitbucket"
15-
git "gopkg.in/src-d/go-git.v4"
16-
httpGit "gopkg.in/src-d/go-git.v4/plumbing/transport/http"
1715
)
1816

1917
// Provider is capable of gathering Bitbucket server repositories from an org.
@@ -178,11 +176,12 @@ func (p *BitbucketProvider) Gather(user string) ([]GitRepository, error) {
178176

179177
// CloneRepository clones a Github repository given the token. The token must have the `read_repository` rights.
180178
func (p *BitbucketProvider) CloneRepository(cloner cloner.Cloner,
181-
repository GitRepository) (*git.Repository, error) {
182-
auth := &httpGit.BasicAuth{
183-
Username: p.transport.user,
184-
Password: p.token,
179+
repository GitRepository) (string, error) {
180+
url := repository.GetHTTPUrl()
181+
// If token doesn't exist, don't try to basic auth
182+
if p.token != "" {
183+
url = strings.Replace(url, "https://", fmt.Sprintf("https://%s:%s@", p.transport.user, p.token), 1)
185184
}
186185

187-
return cloner.CloneRepository(repository.GetHTTPUrl(), auth)
186+
return cloner.CloneRepository(url)
188187
}

provider/generic_repository.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@ import (
44
"errors"
55
"srcfingerprint/cloner"
66
"time"
7-
8-
git "gopkg.in/src-d/go-git.v4"
97
)
108

119
// Generic Repository Structure.
@@ -59,6 +57,6 @@ func (p *GenericProvider) Gather(user string) ([]GitRepository, error) {
5957

6058
func (p *GenericProvider) CloneRepository(
6159
cloner cloner.Cloner,
62-
repository GitRepository) (*git.Repository, error) {
63-
return cloner.CloneRepository(repository.GetHTTPUrl(), nil)
60+
repository GitRepository) (string, error) {
61+
return cloner.CloneRepository(repository.GetHTTPUrl())
6462
}

provider/github.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,12 @@ import (
55
"fmt"
66
"net/url"
77
"srcfingerprint/cloner"
8+
"strings"
89
"sync"
910

1011
"github.com/google/go-github/github"
1112
log "github.com/sirupsen/logrus"
1213
"golang.org/x/oauth2"
13-
git "gopkg.in/src-d/go-git.v4"
14-
"gopkg.in/src-d/go-git.v4/plumbing/transport/http"
1514
)
1615

1716
const (
@@ -142,11 +141,12 @@ func (p *GitHubProvider) Gather(user string) ([]GitRepository, error) {
142141

143142
// CloneRepository clones a Github repository given the token. The token must have the `read_repository` rights.
144143
func (p *GitHubProvider) CloneRepository(cloner cloner.Cloner,
145-
repository GitRepository) (*git.Repository, error) {
146-
auth := &http.BasicAuth{
147-
Username: p.token,
148-
Password: p.token,
144+
repository GitRepository) (string, error) {
145+
url := repository.GetHTTPUrl()
146+
// If token doesn't exist, don't try to basic auth
147+
if p.token != "" {
148+
url = strings.Replace(url, "https://", fmt.Sprintf("https://x-access-token:%s@", p.token), 1)
149149
}
150150

151-
return cloner.CloneRepository(repository.GetHTTPUrl(), auth)
151+
return cloner.CloneRepository(url)
152152
}

provider/gitlab.go

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ import (
99

1010
log "github.com/sirupsen/logrus"
1111
gitlab "github.com/xanzy/go-gitlab"
12-
git "gopkg.in/src-d/go-git.v4"
13-
"gopkg.in/src-d/go-git.v4/plumbing/transport/http"
1412
)
1513

1614
const (
@@ -239,16 +237,12 @@ func (p *GitLabProvider) collectFromGroup(repositories []GitRepository,
239237
// CloneRepository clones a Gitlab repository given the token. The token must have the `read_repository` rights.
240238
func (p *GitLabProvider) CloneRepository(
241239
cloner cloner.Cloner,
242-
repository GitRepository) (*git.Repository, error) {
243-
auth := &http.BasicAuth{
244-
Username: p.token,
245-
Password: p.token,
246-
}
247-
240+
repository GitRepository) (string, error) {
241+
url := repository.GetHTTPUrl()
248242
// If token doesn't exist, don't try to basic auth
249-
if p.token == "" {
250-
auth = nil
243+
if p.token != "" {
244+
url = strings.Replace(url, "https://", fmt.Sprintf("https://%s:%s@", p.token, p.token), 1)
251245
}
252246

253-
return cloner.CloneRepository(repository.GetHTTPUrl(), auth)
247+
return cloner.CloneRepository(url)
254248
}

0 commit comments

Comments
 (0)