Skip to content

Commit 7ad20a2

Browse files
oliverpoolEarl Warren
authored andcommitted
git/blob: GetContentBase64 with fewer allocations and no goroutine (#8297)
See #8222 for context.i `GetBlobContentBase64` was using a pipe and a goroutine to read the blob content as base64. This can be replace by a pre-allocated buffer and a direct copy. Note that although similar to `GetBlobContent`, it does not truncate the content if the blob size is over the limit (but returns an error). I think that `GetBlobContent` should adopt the same behavior at some point (error instead of truncating). ### Tests - I added test coverage for Go changes... - [x] in their respective `*_test.go` for unit tests. - [x] I did not document these changes and I do not expect someone else to do it. ### Release notes - [x] I do not want this change to show in the release notes. Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/8297 Reviewed-by: Earl Warren <[email protected]> Co-authored-by: oliverpool <[email protected]> Co-committed-by: oliverpool <[email protected]>
1 parent 184e068 commit 7ad20a2

File tree

4 files changed

+56
-30
lines changed

4 files changed

+56
-30
lines changed

modules/git/blob.go

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"bufio"
99
"bytes"
1010
"encoding/base64"
11+
"fmt"
1112
"io"
1213

1314
"forgejo.org/modules/log"
@@ -172,33 +173,43 @@ func (b *Blob) GetBlobContent(limit int64) (string, error) {
172173
return string(buf), err
173174
}
174175

175-
// GetBlobContentBase64 Reads the content of the blob with a base64 encode and returns the encoded string
176-
func (b *Blob) GetBlobContentBase64() (string, error) {
177-
dataRc, err := b.DataAsync()
176+
type BlobTooLargeError struct {
177+
Size, Limit int64
178+
}
179+
180+
func (b BlobTooLargeError) Error() string {
181+
return fmt.Sprintf("blob: content larger than limit (%d > %d)", b.Size, b.Limit)
182+
}
183+
184+
// GetContentBase64 Reads the content of the blob and returns it as base64 encoded string.
185+
// Returns [BlobTooLargeError] if the (unencoded) content is larger than the limit.
186+
func (b *Blob) GetContentBase64(limit int64) (string, error) {
187+
if b.Size() > limit {
188+
return "", BlobTooLargeError{
189+
Size: b.Size(),
190+
Limit: limit,
191+
}
192+
}
193+
194+
rc, size, err := b.NewTruncatedReader(limit)
178195
if err != nil {
179196
return "", err
180197
}
181-
defer dataRc.Close()
182-
183-
pr, pw := io.Pipe()
184-
encoder := base64.NewEncoder(base64.StdEncoding, pw)
198+
defer rc.Close()
185199

186-
go func() {
187-
_, err := io.Copy(encoder, dataRc)
188-
_ = encoder.Close()
200+
encoding := base64.StdEncoding
201+
buf := bytes.NewBuffer(make([]byte, 0, encoding.EncodedLen(int(size))))
189202

190-
if err != nil {
191-
_ = pw.CloseWithError(err)
192-
} else {
193-
_ = pw.Close()
194-
}
195-
}()
203+
encoder := base64.NewEncoder(encoding, buf)
196204

197-
out, err := io.ReadAll(pr)
198-
if err != nil {
205+
if _, err := io.Copy(encoder, rc); err != nil {
199206
return "", err
200207
}
201-
return string(out), nil
208+
if err := encoder.Close(); err != nil {
209+
return "", err
210+
}
211+
212+
return buf.String(), nil
202213
}
203214

204215
// GuessContentType guesses the content type of the blob.

modules/git/blob_test.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,24 @@ func TestBlob(t *testing.T) {
6363
require.Equal(t, "file2\n", r)
6464
})
6565

66+
t.Run("GetContentBase64", func(t *testing.T) {
67+
r, err := testBlob.GetContentBase64(100)
68+
require.NoError(t, err)
69+
require.Equal(t, "ZmlsZTIK", r)
70+
71+
r, err = testBlob.GetContentBase64(-1)
72+
require.ErrorAs(t, err, &BlobTooLargeError{})
73+
require.Empty(t, r)
74+
75+
r, err = testBlob.GetContentBase64(4)
76+
require.ErrorAs(t, err, &BlobTooLargeError{})
77+
require.Empty(t, r)
78+
79+
r, err = testBlob.GetContentBase64(6)
80+
require.NoError(t, err)
81+
require.Equal(t, "ZmlsZTIK", r)
82+
})
83+
6684
t.Run("NewTruncatedReader", func(t *testing.T) {
6785
// read fewer than available
6886
rc, size, err := testBlob.NewTruncatedReader(100)

routers/api/v1/repo/wiki.go

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package repo
55

66
import (
77
"encoding/base64"
8+
"errors"
89
"fmt"
910
"net/http"
1011
"net/url"
@@ -506,11 +507,8 @@ func findWikiRepoCommit(ctx *context.APIContext) (*git.Repository, *git.Commit)
506507
// given tree entry, encoded with base64. Writes to ctx if an error occurs.
507508
func wikiContentsByEntry(ctx *context.APIContext, entry *git.TreeEntry) string {
508509
blob := entry.Blob()
509-
if blob.Size() > setting.API.DefaultMaxBlobSize {
510-
return ""
511-
}
512-
content, err := blob.GetBlobContentBase64()
513-
if err != nil {
510+
content, err := blob.GetContentBase64(setting.API.DefaultMaxBlobSize)
511+
if err != nil && !errors.As(err, &git.BlobTooLargeError{}) {
514512
ctx.Error(http.StatusInternalServerError, "GetBlobContentBase64", err)
515513
return ""
516514
}

services/repository/files/content.go

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package files
55

66
import (
77
"context"
8+
"errors"
89
"fmt"
910
"net/url"
1011
"path"
@@ -273,13 +274,11 @@ func GetBlobBySHA(ctx context.Context, repo *repo_model.Repository, gitRepo *git
273274
if err != nil {
274275
return nil, err
275276
}
276-
content := ""
277-
if gitBlob.Size() <= setting.API.DefaultMaxBlobSize {
278-
content, err = gitBlob.GetBlobContentBase64()
279-
if err != nil {
280-
return nil, err
281-
}
277+
content, err := gitBlob.GetContentBase64(setting.API.DefaultMaxBlobSize)
278+
if err != nil && !errors.As(err, &git.BlobTooLargeError{}) {
279+
return nil, err
282280
}
281+
283282
return &api.GitBlob{
284283
SHA: gitBlob.ID.String(),
285284
URL: repo.APIURL() + "/git/blobs/" + url.PathEscape(gitBlob.ID.String()),

0 commit comments

Comments
 (0)