Skip to content

Commit 584a5a5

Browse files
feat: improve bundle performance (#94)
1 parent 4150ed0 commit 584a5a5

File tree

10 files changed

+127
-64
lines changed

10 files changed

+127
-64
lines changed

bundle/bundle.go

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package bundle
1919
import (
2020
"context"
2121
"fmt"
22-
2322
"github.com/rs/zerolog"
2423

2524
"github.com/snyk/code-client-go/internal/deepcode"
@@ -31,6 +30,7 @@ type Bundle interface {
3130
UploadBatch(ctx context.Context, requestId string, batch *Batch) error
3231
GetBundleHash() string
3332
GetFiles() map[string]deepcode.BundleFile
33+
ClearFiles()
3434
GetMissingFiles() []string
3535
GetLimitToFiles() []string
3636
GetRootPath() string
@@ -82,6 +82,10 @@ func (b *deepCodeBundle) GetFiles() map[string]deepcode.BundleFile {
8282
return b.files
8383
}
8484

85+
func (b *deepCodeBundle) ClearFiles() {
86+
b.files = make(map[string]deepcode.BundleFile)
87+
}
88+
8589
func (b *deepCodeBundle) GetMissingFiles() []string {
8690
return b.missingFiles
8791
}
@@ -143,7 +147,6 @@ func NewBatchFromRawContent(documents map[string][]byte) (*Batch, error) {
143147
if err != nil {
144148
return nil, fmt.Errorf("failed to create file from raw data: %v", err)
145149
}
146-
147150
bundleFiles[key] = bundleFile
148151
}
149152

@@ -154,15 +157,15 @@ func NewBatchFromRawContent(documents map[string][]byte) (*Batch, error) {
154157

155158
// todo simplify the size computation
156159
// maybe consider an addFile / canFitFile interface with proper error handling
157-
func (b *Batch) canFitFile(uri string, content []byte) bool {
158-
docPayloadSize := b.getTotalDocPayloadSize(uri, content)
160+
func (b *Batch) canFitFile(uri string, contentSize int) bool {
161+
docPayloadSize := b.getTotalDocPayloadSize(uri, contentSize)
159162
newSize := docPayloadSize + b.getSize()
160163
b.size += docPayloadSize
161164
return newSize < maxUploadBatchSize
162165
}
163166

164-
func (b *Batch) getTotalDocPayloadSize(documentURI string, content []byte) int {
165-
return len(jsonHashSizePerFile) + len(jsonOverheadPerFile) + len([]byte(documentURI)) + len(content)
167+
func (b *Batch) getTotalDocPayloadSize(documentURI string, contentSize int) int {
168+
return len(jsonHashSizePerFile) + len(jsonOverheadPerFile) + len([]byte(documentURI)) + contentSize
166169
}
167170

168171
func (b *Batch) getSize() int {

bundle/bundle_manager.go

Lines changed: 46 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -106,30 +106,33 @@ func (b *bundleManager) Create(ctx context.Context,
106106
if !supported {
107107
continue
108108
}
109-
var rawContent []byte
110-
rawContent, err = os.ReadFile(absoluteFilePath)
111-
if err != nil {
112-
b.logger.Error().Err(err).Str("filePath", absoluteFilePath).Msg("could not load content of file")
109+
110+
fileInfo, fileErr := os.Stat(absoluteFilePath)
111+
if fileErr != nil {
112+
b.logger.Error().Err(err).Str("filePath", absoluteFilePath).Msg("Failed to read file info")
113113
continue
114114
}
115115

116-
bundleFile, bundleError := deepcode.BundleFileFrom(rawContent)
117-
if bundleError != nil {
118-
b.logger.Error().Err(bundleError).Str("filePath", absoluteFilePath).Msg("could not convert content of file to UTF-8")
116+
if fileInfo.Size() == 0 || fileInfo.Size() > maxFileSize {
119117
continue
120118
}
121119

122-
if !(len(bundleFile.Content) > 0 && len(bundleFile.Content) <= maxFileSize) {
120+
fileContent, fileErr := os.ReadFile(absoluteFilePath)
121+
if fileErr != nil {
122+
b.logger.Error().Err(err).Str("filePath", absoluteFilePath).Msg("Failed to load content of file")
123123
continue
124124
}
125125

126-
var relativePath string
127-
relativePath, err = util.ToRelativeUnixPath(rootPath, absoluteFilePath)
128-
if err != nil {
126+
relativePath, fileErr := util.ToRelativeUnixPath(rootPath, absoluteFilePath)
127+
if fileErr != nil {
129128
b.errorReporter.CaptureError(err, observability.ErrorReporterOptions{ErrorDiagnosticPath: rootPath})
130129
}
131130
relativePath = util.EncodePath(relativePath)
132131

132+
bundleFile, fileErr := deepcode.BundleFileFrom(fileContent)
133+
if fileErr != nil {
134+
b.logger.Error().Err(err).Str("filePath", absoluteFilePath).Msg("Error creating bundle file")
135+
}
133136
bundleFiles[relativePath] = bundleFile
134137
fileHashes[relativePath] = bundleFile.Hash
135138
b.logger.Trace().Str("method", "BundleFileFrom").Str("hash", bundleFile.Hash).Str("filePath", absoluteFilePath).Msg("")
@@ -148,6 +151,7 @@ func (b *bundleManager) Create(ctx context.Context,
148151
if len(fileHashes) > 0 {
149152
bundleHash, missingFiles, err = b.deepcodeClient.CreateBundle(span.Context(), fileHashes)
150153
}
154+
151155
bundle = NewBundle(
152156
b.deepcodeClient,
153157
b.instrumentor,
@@ -187,16 +191,44 @@ func (b *bundleManager) Upload(
187191
if err := ctx.Err(); err != nil {
188192
return bundle, err
189193
}
194+
b.enrichBatchWithFileContent(batch, bundle.GetRootPath())
190195
err := bundle.UploadBatch(s.Context(), requestId, batch)
191196
if err != nil {
192197
return bundle, err
193198
}
199+
batch.documents = make(map[string]deepcode.BundleFile)
194200
}
195201
}
196202

203+
// bundle doesn't need file map anymore since they are already grouped and uploaded
204+
bundle.ClearFiles()
197205
return bundle, nil
198206
}
199207

208+
func (b *bundleManager) enrichBatchWithFileContent(batch *Batch, rootPath string) {
209+
for filePath, bundleFile := range batch.documents {
210+
absPath, err := util.DecodePath(util.ToAbsolutePath(rootPath, filePath))
211+
if err != nil {
212+
b.logger.Error().Err(err).Str("file", filePath).Msg("Failed to decode Path")
213+
continue
214+
}
215+
content, err := os.ReadFile(absPath)
216+
if err != nil {
217+
b.logger.Error().Err(err).Str("file", filePath).Msg("Failed to read bundle file")
218+
continue
219+
}
220+
221+
utf8Content, err := util.ConvertToUTF8(content)
222+
if err != nil {
223+
b.logger.Error().Err(err).Str("file", filePath).Msg("Failed to convert bundle file to UTF-8")
224+
continue
225+
}
226+
227+
bundleFile.Content = string(utf8Content)
228+
batch.documents[filePath] = bundleFile
229+
}
230+
}
231+
200232
func (b *bundleManager) groupInBatches(
201233
ctx context.Context,
202234
bundle Bundle,
@@ -218,12 +250,11 @@ func (b *bundleManager) groupInBatches(
218250
}
219251

220252
file := files[filePath]
221-
var fileContent = []byte(file.Content)
222-
if batch.canFitFile(filePath, fileContent) {
223-
b.logger.Trace().Str("path", filePath).Int("size", len(fileContent)).Msgf("added to deepCodeBundle #%v", len(batches))
253+
if batch.canFitFile(filePath, file.ContentSize) {
254+
b.logger.Trace().Str("path", filePath).Int("size", file.ContentSize).Msgf("added to deepCodeBundle #%v", len(batches))
224255
batch.documents[filePath] = file
225256
} else {
226-
b.logger.Trace().Str("path", filePath).Int("size", len(fileContent)).Msgf("created new deepCodeBundle - %v bundles in this upload so far", len(batches))
257+
b.logger.Trace().Str("path", filePath).Int("size", file.ContentSize).Msgf("created new deepCodeBundle - %v bundles in this upload so far", len(batches))
227258
newUploadBatch := NewBatch(map[string]deepcode.BundleFile{})
228259
newUploadBatch.documents[filePath] = file
229260
batches = append(batches, newUploadBatch)

bundle/bundle_manager_test.go

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ import (
4040
func Test_Create(t *testing.T) {
4141
t.Run(
4242
"when < maxFileSize creates deepCodeBundle", func(t *testing.T) {
43+
testBundleHash := "test-bundle-hash-123"
4344
ctrl := gomock.NewController(t)
4445
mockSpan := mocks.NewMockSpan(ctrl)
4546
mockSpan.EXPECT().Context().AnyTimes()
@@ -50,7 +51,7 @@ func Test_Create(t *testing.T) {
5051
}, nil)
5152
mockSnykCodeClient.EXPECT().CreateBundle(gomock.Any(), map[string]string{
5253
"file.java": "386f1997f6da5133a0f75c347d5cdff15a428b817231278e2509832c1a80b3ea",
53-
}).Times(1)
54+
}).Return(testBundleHash, []string{}, nil).Times(1)
5455
mockInstrumentor := mocks.NewMockInstrumentor(ctrl)
5556
mockInstrumentor.EXPECT().StartSpan(gomock.Any(), gomock.Any()).Return(mockSpan).AnyTimes()
5657
mockInstrumentor.EXPECT().Finish(gomock.Any()).AnyTimes()
@@ -75,6 +76,7 @@ func Test_Create(t *testing.T) {
7576
map[string]bool{})
7677
require.NoError(t, err)
7778
assert.Len(t, bundle.GetFiles(), 1, "deepCodeBundle should have 1 deepCodeBundle files")
79+
assert.Equal(t, testBundleHash, bundle.GetBundleHash(), "Bundle should have valid hash")
7880
},
7981
)
8082

@@ -88,6 +90,8 @@ func Test_Create(t *testing.T) {
8890
ConfigFiles: []string{},
8991
Extensions: []string{".java"},
9092
}, nil)
93+
// Bundle creation should only occur if we have candidate files. If all files are too big, we skip creation.
94+
mockSnykCodeClient.EXPECT().CreateBundle(gomock.Any(), gomock.Any()).Times(0)
9195
mockInstrumentor := mocks.NewMockInstrumentor(ctrl)
9296
mockInstrumentor.EXPECT().StartSpan(gomock.Any(), gomock.Any()).Return(mockSpan).AnyTimes()
9397
mockInstrumentor.EXPECT().Finish(gomock.Any()).AnyTimes()
@@ -126,6 +130,8 @@ func Test_Create(t *testing.T) {
126130
ConfigFiles: []string{},
127131
Extensions: []string{".java"},
128132
}, nil)
133+
// Bundle creation should only occur if we have candidate files. If all files are skipped we skip creation.
134+
mockSnykCodeClient.EXPECT().CreateBundle(gomock.Any(), gomock.Any()).Times(0)
129135
mockInstrumentor := mocks.NewMockInstrumentor(ctrl)
130136
mockInstrumentor.EXPECT().StartSpan(gomock.Any(), gomock.Any()).Return(mockSpan).AnyTimes()
131137
mockInstrumentor.EXPECT().Finish(gomock.Any()).AnyTimes()
@@ -168,6 +174,8 @@ func Test_Create(t *testing.T) {
168174
ConfigFiles: []string{},
169175
Extensions: []string{".java"},
170176
}, nil)
177+
// Bundle creation should only occur if we have candidate files. If all files are ignored, we skip creation.
178+
mockSnykCodeClient.EXPECT().CreateBundle(gomock.Any(), gomock.Any()).Times(0)
171179
mockInstrumentor := mocks.NewMockInstrumentor(ctrl)
172180
mockInstrumentor.EXPECT().StartSpan(gomock.Any(), gomock.Any()).Return(mockSpan).AnyTimes()
173181
mockInstrumentor.EXPECT().Finish(gomock.Any()).AnyTimes()
@@ -209,7 +217,7 @@ func Test_Create(t *testing.T) {
209217
}, nil)
210218
mockSnykCodeClient.EXPECT().CreateBundle(gomock.Any(), map[string]string{
211219
".test": "9c05690c5b8e22df259431c95df33d01267f799de6810382ada1a9ff1b89710e",
212-
}).Times(1)
220+
}).Return("test-bundle-hash-456", []string{}, nil).Times(1)
213221
mockInstrumentor := mocks.NewMockInstrumentor(ctrl)
214222
mockInstrumentor.EXPECT().StartSpan(gomock.Any(), gomock.Any()).Return(mockSpan).AnyTimes()
215223
mockInstrumentor.EXPECT().Finish(gomock.Any()).AnyTimes()
@@ -248,7 +256,7 @@ func Test_Create(t *testing.T) {
248256
mockSnykCodeClient.EXPECT().CreateBundle(gomock.Any(), map[string]string{
249257
"path/to/file1.java": "9c05690c5b8e22df259431c95df33d01267f799de6810382ada1a9ff1b89710e",
250258
"path/with%20spaces/file2.java": "9c05690c5b8e22df259431c95df33d01267f799de6810382ada1a9ff1b89710e",
251-
}).Times(1)
259+
}).Return("test-bundle-hash-789", []string{}, nil).Times(1)
252260
mockInstrumentor := mocks.NewMockInstrumentor(ctrl)
253261
mockInstrumentor.EXPECT().StartSpan(gomock.Any(), gomock.Any()).Return(mockSpan).AnyTimes()
254262
mockInstrumentor.EXPECT().Finish(gomock.Any()).AnyTimes()
@@ -302,7 +310,7 @@ func Test_Create(t *testing.T) {
302310
mockSnykCodeClient.EXPECT().CreateBundle(gomock.Any(), map[string]string{
303311
"path/to/file1.java": "9c05690c5b8e22df259431c95df33d01267f799de6810382ada1a9ff1b89710e",
304312
"path/with%20spaces/file2.java": "9c05690c5b8e22df259431c95df33d01267f799de6810382ada1a9ff1b89710e",
305-
}).Times(1)
313+
}).Return("test-bundle-hash-abc", []string{}, nil).Times(1)
306314
mockInstrumentor := mocks.NewMockInstrumentor(ctrl)
307315
mockInstrumentor.EXPECT().StartSpan(gomock.Any(), gomock.Any()).Return(mockSpan).AnyTimes()
308316
mockInstrumentor.EXPECT().Finish(gomock.Any()).AnyTimes()
@@ -444,7 +452,8 @@ func createTempFileInDir(t *testing.T, name string, size int, temporaryDir strin
444452
t.Helper()
445453

446454
documentURI, fileContent := createFileOfSize(t, name, size, temporaryDir)
447-
return documentURI, deepcode.BundleFile{Hash: util.Hash(fileContent), Content: string(fileContent)}
455+
hash, _ := util.Hash(fileContent)
456+
return documentURI, deepcode.BundleFile{Hash: hash, ContentSize: size}
448457
}
449458

450459
func Test_IsSupported_Extensions(t *testing.T) {

bundle/bundle_test.go

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,13 @@
1717
package bundle_test
1818

1919
import (
20+
"bytes"
2021
"context"
2122
"crypto/sha256"
2223
"encoding/hex"
2324
"fmt"
25+
"golang.org/x/net/html/charset"
26+
"io"
2427
"os"
2528
"testing"
2629

@@ -138,7 +141,7 @@ func Test_RawContentBatch(t *testing.T) {
138141
t.Run("create a batch from raw content and upload the bundle", func(t *testing.T) {
139142
ctrl := gomock.NewController(t)
140143
mockSnykCodeClient := deepcodeMocks.NewMockDeepcodeClient(ctrl)
141-
mockSnykCodeClient.EXPECT().ExtendBundle(gomock.Any(), "testBundleHash", bundleFilePartialMatcher{expectedKey: "hello", expectedContent: "world"}, []string{}).Return("newBundleHash", []string{}, nil).Times(1)
144+
mockSnykCodeClient.EXPECT().ExtendBundle(gomock.Any(), "testBundleHash", bundleFilePartialMatcher{expectedKey: "hello", expectedContent: ""}, []string{}).Return("newBundleHash", []string{}, nil).Times(1)
142145

143146
mockSpan := mocks.NewMockSpan(ctrl)
144147
mockSpan.EXPECT().Context().AnyTimes()
@@ -160,21 +163,24 @@ func Test_RawContentBatch(t *testing.T) {
160163
func Test_BundleEncoding(t *testing.T) {
161164
t.Run("utf-8 encoded content", func(t *testing.T) {
162165
content := []byte("hello")
163-
bundle, err := deepcode.BundleFileFrom(content)
166+
bundleFile, err := deepcode.BundleFileFrom(content)
164167
assert.NoError(t, err)
165168

166-
actualShasum := sha256.Sum256([]byte(bundle.Content))
167-
assert.Equal(t, bundle.Hash, hex.EncodeToString(actualShasum[:]))
169+
ExpectedShaSum := sha256.Sum256(content)
170+
assert.Equal(t, hex.EncodeToString(ExpectedShaSum[:]), bundleFile.Hash)
168171
})
169172

170173
t.Run("non utf-8 / binary file", func(t *testing.T) {
171174
content, err := os.ReadFile("testdata/rshell_font.php")
172175
assert.NoError(t, err)
173176

174-
bundle, err := deepcode.BundleFileFrom(content)
177+
bundleFile, err := deepcode.BundleFileFrom(content)
175178
assert.NoError(t, err)
176179

177-
actualShasum := sha256.Sum256([]byte(bundle.Content))
178-
assert.Equal(t, bundle.Hash, hex.EncodeToString(actualShasum[:]))
180+
byteReader := bytes.NewReader(content)
181+
reader, _ := charset.NewReaderLabel("UTF-8", byteReader)
182+
utf8content, _ := io.ReadAll(reader)
183+
ExpectedShaSum := sha256.Sum256(utf8content)
184+
assert.Equal(t, hex.EncodeToString(ExpectedShaSum[:]), bundleFile.Hash)
179185
})
180186
}

bundle/mocks/bundle.go

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/deepcode/client_pact_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ func TestPact_DeepcodeClient(t *testing.T) {
5656

5757
t.Run("Create bundle", func(t *testing.T) {
5858
files := make(map[string]string)
59-
files[path1] = util.Hash([]byte(content))
59+
files[path1], _ = util.Hash([]byte(content))
6060

6161
pact.AddInteraction().Given("New bundle").UponReceiving("Create bundle").WithCompleteRequest(consumer.Request{
6262
Method: "POST",
@@ -112,7 +112,7 @@ func TestPact_DeepcodeClient(t *testing.T) {
112112
test := func(config consumer.MockServerConfig) error {
113113
client := getDeepCodeClient(t, getLocalMockserver(config))
114114
files := make(map[string]string)
115-
files[path1] = util.Hash([]byte(content))
115+
files[path1], _ = util.Hash([]byte(content))
116116
_, _, err := client.CreateBundle(context.Background(), files)
117117

118118
if err != nil {

0 commit comments

Comments
 (0)