Skip to content

Commit 9367609

Browse files
fix: resolve hash mismatches when uploading files (#96)
Co-authored-by: Peter Schäfer <[email protected]>
1 parent 8187e9c commit 9367609

File tree

6 files changed

+66
-20
lines changed

6 files changed

+66
-20
lines changed

bundle/bundle_manager.go

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,11 @@ package bundle
1818

1919
import (
2020
"context"
21-
"github.com/snyk/code-client-go/scan"
2221
"os"
2322
"path/filepath"
2423

24+
"github.com/snyk/code-client-go/scan"
25+
2526
"github.com/puzpuzpuz/xsync"
2627
"github.com/rs/zerolog"
2728

@@ -105,14 +106,20 @@ func (b *bundleManager) Create(ctx context.Context,
105106
if !supported {
106107
continue
107108
}
108-
var fileContent []byte
109-
fileContent, err = os.ReadFile(absoluteFilePath)
109+
var rawContent []byte
110+
rawContent, err = os.ReadFile(absoluteFilePath)
110111
if err != nil {
111112
b.logger.Error().Err(err).Str("filePath", absoluteFilePath).Msg("could not load content of file")
112113
continue
113114
}
114115

115-
if !(len(fileContent) > 0 && len(fileContent) <= maxFileSize) {
116+
bundleFile, bundleError := deepcode.BundleFileFrom(rawContent)
117+
if bundleError != nil {
118+
b.logger.Error().Err(bundleError).Str("filePath", absoluteFilePath).Msg("could not convert content of file to UTF-8")
119+
continue
120+
}
121+
122+
if !(len(bundleFile.Content) > 0 && len(bundleFile.Content) <= maxFileSize) {
116123
continue
117124
}
118125

@@ -123,7 +130,6 @@ func (b *bundleManager) Create(ctx context.Context,
123130
}
124131
relativePath = util.EncodePath(relativePath)
125132

126-
bundleFile := deepcode.BundleFileFrom(fileContent)
127133
bundleFiles[relativePath] = bundleFile
128134
fileHashes[relativePath] = bundleFile.Hash
129135
b.logger.Trace().Str("method", "BundleFileFrom").Str("hash", bundleFile.Hash).Str("filePath", absoluteFilePath).Msg("")

bundle/bundle_test.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ package bundle_test
1818

1919
import (
2020
"context"
21+
"crypto/sha256"
22+
"encoding/hex"
23+
"os"
2124
"testing"
2225

2326
"github.com/golang/mock/gomock"
@@ -104,3 +107,25 @@ func Test_UploadBatch(t *testing.T) {
104107
assert.NotEqual(t, oldHash, newHash)
105108
})
106109
}
110+
111+
func Test_BundleEncoding(t *testing.T) {
112+
t.Run("utf-8 encoded content", func(t *testing.T) {
113+
content := []byte("hello")
114+
bundle, err := deepcode.BundleFileFrom(content)
115+
assert.NoError(t, err)
116+
117+
actualShasum := sha256.Sum256([]byte(bundle.Content))
118+
assert.Equal(t, bundle.Hash, hex.EncodeToString(actualShasum[:]))
119+
})
120+
121+
t.Run("non utf-8 / binary file", func(t *testing.T) {
122+
content, err := os.ReadFile("testdata/rshell_font.php")
123+
assert.NoError(t, err)
124+
125+
bundle, err := deepcode.BundleFileFrom(content)
126+
assert.NoError(t, err)
127+
128+
actualShasum := sha256.Sum256([]byte(bundle.Content))
129+
assert.Equal(t, bundle.Hash, hex.EncodeToString(actualShasum[:]))
130+
})
131+
}

bundle/testdata/rshell_font.php

2.86 KB
Binary file not shown.

internal/deepcode/helpers.go

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
package deepcode
1717

1818
import (
19+
"bytes"
20+
1921
"github.com/snyk/code-client-go/internal/util"
2022
)
2123

@@ -24,10 +26,15 @@ type BundleFile struct {
2426
Content string `json:"content"`
2527
}
2628

27-
func BundleFileFrom(content []byte) BundleFile {
29+
func BundleFileFrom(rawContent []byte) (BundleFile, error) {
30+
fileContent, err := util.ConvertToUTF8(bytes.NewReader(rawContent))
31+
if err != nil {
32+
return BundleFile{}, err
33+
}
34+
2835
file := BundleFile{
29-
Hash: util.Hash(content),
30-
Content: string(content),
36+
Hash: util.Hash(fileContent),
37+
Content: string(fileContent),
3138
}
32-
return file
39+
return file, nil
3340
}

internal/util/hash.go

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
package util
1818

1919
import (
20-
"bytes"
2120
"crypto/sha256"
2221
"encoding/hex"
2322
"io"
@@ -26,13 +25,16 @@ import (
2625
)
2726

2827
func Hash(content []byte) string {
29-
byteReader := bytes.NewReader(content)
30-
reader, _ := charset.NewReaderLabel("UTF-8", byteReader)
31-
utf8content, err := io.ReadAll(reader)
32-
if err != nil {
33-
utf8content = content
34-
}
35-
b := sha256.Sum256(utf8content)
28+
b := sha256.Sum256(content)
3629
sum256 := hex.EncodeToString(b[:])
3730
return sum256
3831
}
32+
33+
func ConvertToUTF8(reader io.Reader) ([]byte, error) {
34+
utf8Reader, err := charset.NewReaderLabel("UTF-8", reader)
35+
if err != nil {
36+
return nil, err
37+
}
38+
utf8content, err := io.ReadAll(utf8Reader)
39+
return utf8content, err
40+
}

scan_test.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,12 @@ package codeclient_test
1717

1818
import (
1919
"context"
20-
"github.com/google/uuid"
2120
"os"
2221
"path/filepath"
2322
"testing"
2423

24+
"github.com/google/uuid"
25+
2526
"github.com/golang/mock/gomock"
2627
"github.com/rs/zerolog"
2728
"github.com/stretchr/testify/assert"
@@ -44,9 +45,14 @@ import (
4445
func Test_UploadAndAnalyze(t *testing.T) {
4546
baseDir, firstDocPath, secondDocPath, firstDocContent, secondDocContent := setupDocs(t)
4647
docs := sliceToChannel([]string{firstDocPath, secondDocPath})
48+
firstBundle, err := deepcode.BundleFileFrom(firstDocContent)
49+
assert.NoError(t, err)
50+
secondBundle, err := deepcode.BundleFileFrom(secondDocContent)
51+
assert.NoError(t, err)
52+
4753
files := map[string]deepcode.BundleFile{
48-
firstDocPath: deepcode.BundleFileFrom(firstDocContent),
49-
firstDocPath: deepcode.BundleFileFrom(secondDocContent),
54+
firstDocPath: firstBundle,
55+
firstDocPath: secondBundle,
5056
}
5157

5258
logger := zerolog.Nop()

0 commit comments

Comments
 (0)