Skip to content

Commit 3059282

Browse files
feat: improve bundle performance [IDE-937] (#87)
1 parent cb61999 commit 3059282

File tree

6 files changed

+80
-24
lines changed

6 files changed

+80
-24
lines changed

internal/bundle/bundle.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ type Bundle interface {
3030
UploadBatch(ctx context.Context, requestId string, batch *Batch) error
3131
GetBundleHash() string
3232
GetFiles() map[string]deepcode.BundleFile
33+
ClearFiles()
3334
GetMissingFiles() []string
3435
GetLimitToFiles() []string
3536
GetRootPath() string
@@ -81,6 +82,10 @@ func (b *deepCodeBundle) GetFiles() map[string]deepcode.BundleFile {
8182
return b.files
8283
}
8384

85+
func (b *deepCodeBundle) ClearFiles() {
86+
b.files = make(map[string]deepcode.BundleFile)
87+
}
88+
8489
func (b *deepCodeBundle) GetMissingFiles() []string {
8590
return b.missingFiles
8691
}
@@ -136,15 +141,15 @@ func NewBatch(documents map[string]deepcode.BundleFile) *Batch {
136141

137142
// todo simplify the size computation
138143
// maybe consider an addFile / canFitFile interface with proper error handling
139-
func (b *Batch) canFitFile(uri string, content []byte) bool {
140-
docPayloadSize := b.getTotalDocPayloadSize(uri, content)
144+
func (b *Batch) canFitFile(uri string, contentSize int) bool {
145+
docPayloadSize := b.getTotalDocPayloadSize(uri, contentSize)
141146
newSize := docPayloadSize + b.getSize()
142147
b.size += docPayloadSize
143148
return newSize < maxUploadBatchSize
144149
}
145150

146-
func (b *Batch) getTotalDocPayloadSize(documentURI string, content []byte) int {
147-
return len(jsonHashSizePerFile) + len(jsonOverheadPerFile) + len([]byte(documentURI)) + len(content)
151+
func (b *Batch) getTotalDocPayloadSize(documentURI string, contentSize int) int {
152+
return len(jsonHashSizePerFile) + len(jsonOverheadPerFile) + len([]byte(documentURI)) + contentSize
148153
}
149154

150155
func (b *Batch) getSize() int {

internal/bundle/bundle_manager.go

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -105,20 +105,25 @@ func (b *bundleManager) Create(ctx context.Context,
105105
if !supported {
106106
continue
107107
}
108-
var fileContent []byte
109-
fileContent, err = os.ReadFile(absoluteFilePath)
110-
if err != nil {
111-
b.logger.Error().Err(err).Str("filePath", absoluteFilePath).Msg("could not load content of file")
108+
109+
fileInfo, fileErr := os.Stat(absoluteFilePath)
110+
if fileErr != nil {
111+
b.logger.Error().Err(err).Str("filePath", absoluteFilePath).Msg("Failed to read file info")
112112
continue
113113
}
114114

115-
if !(len(fileContent) > 0 && len(fileContent) <= maxFileSize) {
115+
if fileInfo.Size() == 0 || fileInfo.Size() > maxFileSize {
116116
continue
117117
}
118118

119-
var relativePath string
120-
relativePath, err = util.ToRelativeUnixPath(rootPath, absoluteFilePath)
121-
if err != nil {
119+
fileContent, fileErr := os.ReadFile(absoluteFilePath)
120+
if fileErr != nil {
121+
b.logger.Error().Err(err).Str("filePath", absoluteFilePath).Msg("Failed to load content of file")
122+
continue
123+
}
124+
125+
relativePath, fileErr := util.ToRelativeUnixPath(rootPath, absoluteFilePath)
126+
if fileErr != nil {
122127
b.errorReporter.CaptureError(err, observability.ErrorReporterOptions{ErrorDiagnosticPath: rootPath})
123128
}
124129
relativePath = util.EncodePath(relativePath)
@@ -181,16 +186,44 @@ func (b *bundleManager) Upload(
181186
if err := ctx.Err(); err != nil {
182187
return bundle, err
183188
}
189+
b.enrichBatchWithFileContent(batch, bundle.GetRootPath())
184190
err := bundle.UploadBatch(s.Context(), requestId, batch)
185191
if err != nil {
186192
return bundle, err
187193
}
194+
batch.documents = make(map[string]deepcode.BundleFile)
188195
}
189196
}
190197

198+
// bundle doesn't need file map anymore since they are already grouped and uploaded
199+
bundle.ClearFiles()
191200
return bundle, nil
192201
}
193202

203+
func (b *bundleManager) enrichBatchWithFileContent(batch *Batch, rootPath string) {
204+
for filePath, bundleFile := range batch.documents {
205+
absPath, err := util.DecodePath(util.ToAbsolutePath(rootPath, filePath))
206+
if err != nil {
207+
b.logger.Error().Err(err).Str("file", filePath).Msg("Failed to decode Path")
208+
continue
209+
}
210+
content, err := os.ReadFile(absPath)
211+
if err != nil {
212+
b.logger.Error().Err(err).Str("file", filePath).Msg("Failed to read bundle file")
213+
continue
214+
}
215+
216+
utf8Content, err := util.ConvertToUTF8(content)
217+
if err != nil {
218+
b.logger.Error().Err(err).Str("file", filePath).Msg("Failed to convert bundle file to UTF-8")
219+
continue
220+
}
221+
222+
bundleFile.Content = string(utf8Content)
223+
batch.documents[filePath] = bundleFile
224+
}
225+
}
226+
194227
func (b *bundleManager) groupInBatches(
195228
ctx context.Context,
196229
bundle Bundle,
@@ -212,12 +245,11 @@ func (b *bundleManager) groupInBatches(
212245
}
213246

214247
file := files[filePath]
215-
var fileContent = []byte(file.Content)
216-
if batch.canFitFile(filePath, fileContent) {
217-
b.logger.Trace().Str("path", filePath).Int("size", len(fileContent)).Msgf("added to deepCodeBundle #%v", len(batches))
248+
if batch.canFitFile(filePath, file.ContentSize) {
249+
b.logger.Trace().Str("path", filePath).Int("size", file.ContentSize).Msgf("added to deepCodeBundle #%v", len(batches))
218250
batch.documents[filePath] = file
219251
} else {
220-
b.logger.Trace().Str("path", filePath).Int("size", len(fileContent)).Msgf("created new deepCodeBundle - %v bundles in this upload so far", len(batches))
252+
b.logger.Trace().Str("path", filePath).Int("size", file.ContentSize).Msgf("created new deepCodeBundle - %v bundles in this upload so far", len(batches))
221253
newUploadBatch := NewBatch(map[string]deepcode.BundleFile{})
222254
newUploadBatch.documents[filePath] = file
223255
batches = append(batches, newUploadBatch)

internal/bundle/bundle_manager_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ func createTempFileInDir(t *testing.T, name string, size int, temporaryDir strin
444444
t.Helper()
445445

446446
documentURI, fileContent := createFileOfSize(t, name, size, temporaryDir)
447-
return documentURI, deepcode.BundleFile{Hash: util.Hash(fileContent), Content: string(fileContent)}
447+
return documentURI, deepcode.BundleFile{Hash: util.Hash(fileContent), ContentSize: size}
448448
}
449449

450450
func Test_IsSupported_Extensions(t *testing.T) {

internal/bundle/mocks/bundle.go

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/deepcode/helpers.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,16 @@ import (
2020
)
2121

2222
type BundleFile struct {
23-
Hash string `json:"hash"`
24-
Content string `json:"content"`
23+
Hash string `json:"hash"`
24+
Content string `json:"content"`
25+
ContentSize int `json:"size"`
2526
}
2627

2728
func BundleFileFrom(content []byte) BundleFile {
2829
file := BundleFile{
29-
Hash: util.Hash(content),
30-
Content: string(content),
30+
Hash: util.Hash(content),
31+
Content: "", // We create the bundleFile empty, and enrich with content later.
32+
ContentSize: len(content),
3133
}
3234
return file
3335
}

internal/util/hash.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,18 @@ import (
2626
)
2727

2828
func Hash(content []byte) string {
29-
byteReader := bytes.NewReader(content)
30-
reader, _ := charset.NewReaderLabel("UTF-8", byteReader)
31-
utf8content, err := io.ReadAll(reader)
29+
utf8content, err := ConvertToUTF8(content)
3230
if err != nil {
3331
utf8content = content
3432
}
3533
b := sha256.Sum256(utf8content)
3634
sum256 := hex.EncodeToString(b[:])
3735
return sum256
3836
}
37+
38+
func ConvertToUTF8(content []byte) ([]byte, error) {
39+
byteReader := bytes.NewReader(content)
40+
reader, _ := charset.NewReaderLabel("UTF-8", byteReader)
41+
utf8content, err := io.ReadAll(reader)
42+
return utf8content, err
43+
}

0 commit comments

Comments
 (0)