Skip to content

Commit 1494626

Browse files
authored
Merge pull request #337 from fluxcd/efficient-bucket-download
2 parents 0693289 + d3bcc6a commit 1494626

File tree

10 files changed

+633
-274
lines changed

10 files changed

+633
-274
lines changed

.github/actions/run-tests/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM golang:1.15-alpine
1+
FROM golang:1.16-alpine
22

33
# Add any build or testing essential system packages
44
RUN apk add --no-cache build-base git pkgconf

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Docker buildkit multi-arch build requires golang alpine
2-
FROM golang:1.15-alpine as builder
2+
FROM golang:1.16-alpine as builder
33

44
RUN apk add gcc pkgconfig libc-dev
55
RUN apk add --no-cache musl~=1.2 libgit2-dev~=1.1

api/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
module github.com/fluxcd/source-controller/api
22

3-
go 1.15
3+
go 1.16
44

55
require (
66
github.com/fluxcd/pkg/apis/meta v0.8.0

controllers/bucket_controller.go

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ import (
4949
"github.com/fluxcd/pkg/runtime/predicates"
5050

5151
sourcev1 "github.com/fluxcd/source-controller/api/v1beta1"
52+
"github.com/fluxcd/source-controller/pkg/sourceignore"
5253
)
5354

5455
// +kubebuilder:rbac:groups=source.toolkit.fluxcd.io,resources=buckets,verbs=get;list;watch;create;update;patch;delete
@@ -202,6 +203,25 @@ func (r *BucketReconciler) reconcile(ctx context.Context, bucket sourcev1.Bucket
202203
return sourcev1.BucketNotReady(bucket, sourcev1.BucketOperationFailedReason, err.Error()), err
203204
}
204205

206+
// Look for file with ignore rules first
207+
// NB: S3 has flat filepath keys making it impossible to look
208+
// for files in "subdirectories" without building up a tree first.
209+
path := filepath.Join(tempDir, sourceignore.IgnoreFile)
210+
if err := s3Client.FGetObject(ctxTimeout, bucket.Spec.BucketName, sourceignore.IgnoreFile, path, minio.GetObjectOptions{}); err != nil {
211+
if resp, ok := err.(minio.ErrorResponse); ok && resp.Code != "NoSuchKey" {
212+
return sourcev1.BucketNotReady(bucket, sourcev1.BucketOperationFailedReason, err.Error()), err
213+
}
214+
}
215+
ps, err := sourceignore.ReadIgnoreFile(path, nil)
216+
if err != nil {
217+
return sourcev1.BucketNotReady(bucket, sourcev1.BucketOperationFailedReason, err.Error()), err
218+
}
219+
// In-spec patterns take precedence
220+
if bucket.Spec.Ignore != nil {
221+
ps = append(ps, sourceignore.ReadPatterns(strings.NewReader(*bucket.Spec.Ignore), nil)...)
222+
}
223+
matcher := sourceignore.NewMatcher(ps)
224+
205225
// download bucket content
206226
for object := range s3Client.ListObjects(ctxTimeout, bucket.Spec.BucketName, minio.ListObjectsOptions{
207227
Recursive: true,
@@ -212,7 +232,11 @@ func (r *BucketReconciler) reconcile(ctx context.Context, bucket sourcev1.Bucket
212232
return sourcev1.BucketNotReady(bucket, sourcev1.BucketOperationFailedReason, err.Error()), err
213233
}
214234

215-
if strings.HasSuffix(object.Key, "/") {
235+
if strings.HasSuffix(object.Key, "/") || object.Key == sourceignore.IgnoreFile {
236+
continue
237+
}
238+
239+
if matcher.Match([]string{object.Key}, false) {
216240
continue
217241
}
218242

@@ -255,7 +279,7 @@ func (r *BucketReconciler) reconcile(ctx context.Context, bucket sourcev1.Bucket
255279
defer unlock()
256280

257281
// archive artifact and check integrity
258-
if err := r.Storage.Archive(&artifact, tempDir, bucket.Spec.Ignore); err != nil {
282+
if err := r.Storage.Archive(&artifact, tempDir, nil); err != nil {
259283
err = fmt.Errorf("storage archive error: %w", err)
260284
return sourcev1.BucketNotReady(bucket, sourcev1.StorageOperationFailedReason, err.Error()), err
261285
}

controllers/gitrepository_controller.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"fmt"
2222
"io/ioutil"
2323
"os"
24+
"strings"
2425
"time"
2526

2627
"github.com/go-logr/logr"
@@ -45,6 +46,7 @@ import (
4546
sourcev1 "github.com/fluxcd/source-controller/api/v1beta1"
4647
"github.com/fluxcd/source-controller/pkg/git"
4748
"github.com/fluxcd/source-controller/pkg/git/strategy"
49+
"github.com/fluxcd/source-controller/pkg/sourceignore"
4850
)
4951

5052
// +kubebuilder:rbac:groups=source.toolkit.fluxcd.io,resources=gitrepositories,verbs=get;list;watch;create;update;patch;delete
@@ -270,7 +272,15 @@ func (r *GitRepositoryReconciler) reconcile(ctx context.Context, repository sour
270272
defer unlock()
271273

272274
// archive artifact and check integrity
273-
if err := r.Storage.Archive(&artifact, tmpGit, repository.Spec.Ignore); err != nil {
275+
ps, err := sourceignore.LoadIgnorePatterns(tmpGit, nil)
276+
if err != nil {
277+
err = fmt.Errorf(".sourceignore error: %w", err)
278+
return sourcev1.GitRepositoryNotReady(repository, sourcev1.StorageOperationFailedReason, err.Error()), err
279+
}
280+
if repository.Spec.Ignore != nil {
281+
ps = append(ps, sourceignore.ReadPatterns(strings.NewReader(*repository.Spec.Ignore), nil)...)
282+
}
283+
if err := r.Storage.Archive(&artifact, tmpGit, SourceIgnoreFilter(ps, nil)); err != nil {
274284
err = fmt.Errorf("storage archive error: %w", err)
275285
return sourcev1.GitRepositoryNotReady(repository, sourcev1.StorageOperationFailedReason, err.Error()), err
276286
}

controllers/storage.go

Lines changed: 62 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ package controllers
1818

1919
import (
2020
"archive/tar"
21-
"bufio"
22-
"bytes"
2321
"compress/gzip"
2422
"crypto/sha1"
2523
"fmt"
@@ -39,14 +37,7 @@ import (
3937

4038
sourcev1 "github.com/fluxcd/source-controller/api/v1beta1"
4139
"github.com/fluxcd/source-controller/internal/fs"
42-
)
43-
44-
const (
45-
excludeFile = ".sourceignore"
46-
excludeVCS = ".git/,.gitignore,.gitmodules,.gitattributes"
47-
excludeExt = "*.jpg,*.jpeg,*.gif,*.png,*.wmv,*.flv,*.tar.gz,*.zip"
48-
excludeCI = ".github/,.circleci/,.travis.yml,.gitlab-ci.yml,appveyor.yml,.drone.yml,cloudbuild.yaml,codeship-services.yml,codeship-steps.yml"
49-
excludeExtra = "**/.goreleaser.yml,**/.sops.yaml,**/.flux.yaml"
40+
"github.com/fluxcd/source-controller/pkg/sourceignore"
5041
)
5142

5243
// Storage manages artifacts
@@ -151,19 +142,35 @@ func (s *Storage) ArtifactExist(artifact sourcev1.Artifact) bool {
151142
return fi.Mode().IsRegular()
152143
}
153144

154-
// Archive atomically archives the given directory as a tarball to the given v1beta1.Artifact
155-
// path, excluding any VCS specific files and directories, or any of the excludes defined in
156-
// the excludeFiles. If successful, it sets the checksum and last update time on the artifact.
157-
func (s *Storage) Archive(artifact *sourcev1.Artifact, dir string, ignore *string) (err error) {
158-
if f, err := os.Stat(dir); os.IsNotExist(err) || !f.IsDir() {
159-
return fmt.Errorf("invalid dir path: %s", dir)
145+
// ArchiveFileFilter must return true if a file should not be included
146+
// in the archive after inspecting the given path and/or os.FileInfo.
147+
type ArchiveFileFilter func(p string, fi os.FileInfo) bool
148+
149+
// SourceIgnoreFilter returns an ArchiveFileFilter that filters out
150+
// files matching sourceignore.VCSPatterns and any of the provided
151+
// patterns. If an empty gitignore.Pattern slice is given, the matcher
152+
// is set to sourceignore.NewDefaultMatcher.
153+
func SourceIgnoreFilter(ps []gitignore.Pattern, domain []string) ArchiveFileFilter {
154+
matcher := sourceignore.NewDefaultMatcher(ps, domain)
155+
if len(ps) > 0 {
156+
ps = append(sourceignore.VCSPatterns(domain), ps...)
157+
matcher = sourceignore.NewMatcher(ps)
158+
}
159+
return func(p string, fi os.FileInfo) bool {
160+
// The directory is always false as the archiver does already skip
161+
// directories.
162+
return matcher.Match(strings.Split(p, string(filepath.Separator)), false)
160163
}
164+
}
161165

162-
ps, err := loadExcludePatterns(dir, ignore)
163-
if err != nil {
164-
return err
166+
// Archive atomically archives the given directory as a tarball to the
167+
// given v1beta1.Artifact path, excluding directories and any
168+
// ArchiveFileFilter matches. If successful, it sets the checksum and
169+
// last update time on the artifact.
170+
func (s *Storage) Archive(artifact *sourcev1.Artifact, dir string, filter ArchiveFileFilter) (err error) {
171+
if f, err := os.Stat(dir); os.IsNotExist(err) || !f.IsDir() {
172+
return fmt.Errorf("invalid dir path: %s", dir)
165173
}
166-
matcher := gitignore.NewMatcher(ps)
167174

168175
localPath := s.LocalPath(*artifact)
169176
tf, err := ioutil.TempFile(filepath.Split(localPath))
@@ -182,43 +189,7 @@ func (s *Storage) Archive(artifact *sourcev1.Artifact, dir string, ignore *strin
182189

183190
gw := gzip.NewWriter(mw)
184191
tw := tar.NewWriter(gw)
185-
if err := writeToArchiveExcludeMatches(dir, matcher, tw); err != nil {
186-
tw.Close()
187-
gw.Close()
188-
tf.Close()
189-
return err
190-
}
191-
192-
if err := tw.Close(); err != nil {
193-
gw.Close()
194-
tf.Close()
195-
return err
196-
}
197-
if err := gw.Close(); err != nil {
198-
tf.Close()
199-
return err
200-
}
201-
if err := tf.Close(); err != nil {
202-
return err
203-
}
204-
205-
if err := os.Chmod(tmpName, 0644); err != nil {
206-
return err
207-
}
208-
209-
if err := fs.RenameWithFallback(tmpName, localPath); err != nil {
210-
return err
211-
}
212-
213-
artifact.Checksum = fmt.Sprintf("%x", h.Sum(nil))
214-
artifact.LastUpdateTime = metav1.Now()
215-
return nil
216-
}
217-
218-
// writeToArchiveExcludeMatches walks over the given dir and writes any regular file that does
219-
// not match the given gitignore.Matcher.
220-
func writeToArchiveExcludeMatches(dir string, matcher gitignore.Matcher, writer *tar.Writer) error {
221-
fn := func(p string, fi os.FileInfo, err error) error {
192+
if err := filepath.Walk(dir, func(p string, fi os.FileInfo, err error) error {
222193
if err != nil {
223194
return err
224195
}
@@ -228,8 +199,8 @@ func writeToArchiveExcludeMatches(dir string, matcher gitignore.Matcher, writer
228199
return nil
229200
}
230201

231-
// Ignore excluded extensions and files
232-
if matcher.Match(strings.Split(p, "/"), false) {
202+
// Skip filtered files
203+
if filter != nil && filter(p, fi) {
233204
return nil
234205
}
235206

@@ -249,7 +220,7 @@ func writeToArchiveExcludeMatches(dir string, matcher gitignore.Matcher, writer
249220
}
250221
header.Name = relFilePath
251222

252-
if err := writer.WriteHeader(header); err != nil {
223+
if err := tw.WriteHeader(header); err != nil {
253224
return err
254225
}
255226

@@ -258,13 +229,42 @@ func writeToArchiveExcludeMatches(dir string, matcher gitignore.Matcher, writer
258229
f.Close()
259230
return err
260231
}
261-
if _, err := io.Copy(writer, f); err != nil {
232+
if _, err := io.Copy(tw, f); err != nil {
262233
f.Close()
263234
return err
264235
}
265236
return f.Close()
237+
}); err != nil {
238+
tw.Close()
239+
gw.Close()
240+
tf.Close()
241+
return err
242+
}
243+
244+
if err := tw.Close(); err != nil {
245+
gw.Close()
246+
tf.Close()
247+
return err
248+
}
249+
if err := gw.Close(); err != nil {
250+
tf.Close()
251+
return err
252+
}
253+
if err := tf.Close(); err != nil {
254+
return err
266255
}
267-
return filepath.Walk(dir, fn)
256+
257+
if err := os.Chmod(tmpName, 0644); err != nil {
258+
return err
259+
}
260+
261+
if err := fs.RenameWithFallback(tmpName, localPath); err != nil {
262+
return err
263+
}
264+
265+
artifact.Checksum = fmt.Sprintf("%x", h.Sum(nil))
266+
artifact.LastUpdateTime = metav1.Now()
267+
return nil
268268
}
269269

270270
// AtomicWriteFile atomically writes the io.Reader contents to the v1beta1.Artifact path.
@@ -400,51 +400,6 @@ func (s *Storage) LocalPath(artifact sourcev1.Artifact) string {
400400
return filepath.Join(s.BasePath, artifact.Path)
401401
}
402402

403-
// getPatterns collects ignore patterns from the given reader and returns them
404-
// as a gitignore.Pattern slice.
405-
func getPatterns(reader io.Reader, path []string) []gitignore.Pattern {
406-
var ps []gitignore.Pattern
407-
scanner := bufio.NewScanner(reader)
408-
409-
for scanner.Scan() {
410-
s := scanner.Text()
411-
if !strings.HasPrefix(s, "#") && len(strings.TrimSpace(s)) > 0 {
412-
ps = append(ps, gitignore.ParsePattern(s, path))
413-
}
414-
}
415-
416-
return ps
417-
}
418-
419-
// loadExcludePatterns loads the excluded patterns from sourceignore or other
420-
// sources.
421-
func loadExcludePatterns(dir string, ignore *string) ([]gitignore.Pattern, error) {
422-
path := strings.Split(dir, "/")
423-
424-
var ps []gitignore.Pattern
425-
for _, p := range strings.Split(excludeVCS, ",") {
426-
ps = append(ps, gitignore.ParsePattern(p, path))
427-
}
428-
429-
if ignore == nil {
430-
all := strings.Join([]string{excludeExt, excludeCI, excludeExtra}, ",")
431-
for _, p := range strings.Split(all, ",") {
432-
ps = append(ps, gitignore.ParsePattern(p, path))
433-
}
434-
435-
if f, err := os.Open(filepath.Join(dir, excludeFile)); err == nil {
436-
defer f.Close()
437-
ps = append(ps, getPatterns(f, path)...)
438-
} else if !os.IsNotExist(err) {
439-
return nil, err
440-
}
441-
} else {
442-
ps = append(ps, getPatterns(bytes.NewBufferString(*ignore), path)...)
443-
}
444-
445-
return ps, nil
446-
}
447-
448403
// newHash returns a new SHA1 hash.
449404
func newHash() hash.Hash {
450405
return sha1.New()

0 commit comments

Comments
 (0)