Skip to content

Commit 9a0ec53

Browse files
ChristopherHXsilverwindwxiaoguang
authored
Stream repo zip/tar.gz/bundle achives by default (#35487)
Initial implementation of linked proposal. * Closes #29942 * Fix #34003 * Fix #30443 --------- Co-authored-by: silverwind <[email protected]> Co-authored-by: wxiaoguang <[email protected]>
1 parent 90cb5f9 commit 9a0ec53

File tree

6 files changed

+110
-156
lines changed

6 files changed

+110
-156
lines changed

modules/setting/repository.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,12 @@ var (
5454
AllowForkWithoutMaximumLimit bool
5555
AllowForkIntoSameOwner bool
5656

57+
// StreamArchives makes Gitea stream git archive files to the client directly instead of creating an archive first.
58+
// Ideally all users should use this streaming method. However, at the moment we don't know whether there are
59+
// any users who still need the old behavior, so we introduce this option, intentionally not documenting it.
60+
// After one or two releases, if no one complains, we will remove this option and always use streaming.
61+
StreamArchives bool
62+
5763
// Repository editor settings
5864
Editor struct {
5965
LineWrapExtensions []string
@@ -167,6 +173,7 @@ var (
167173
DisableStars: false,
168174
DefaultBranch: "main",
169175
AllowForkWithoutMaximumLimit: true,
176+
StreamArchives: true,
170177

171178
// Repository editor settings
172179
Editor: struct {

routers/api/v1/api.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,7 +1247,7 @@ func Routes() *web.Router {
12471247
}, reqToken())
12481248
m.Get("/raw/*", context.ReferencesGitRepo(), context.RepoRefForAPI, reqRepoReader(unit.TypeCode), repo.GetRawFile)
12491249
m.Get("/media/*", context.ReferencesGitRepo(), context.RepoRefForAPI, reqRepoReader(unit.TypeCode), repo.GetRawFileOrLFS)
1250-
m.Methods("HEAD,GET", "/archive/*", reqRepoReader(unit.TypeCode), repo.GetArchive)
1250+
m.Methods("HEAD,GET", "/archive/*", reqRepoReader(unit.TypeCode), context.ReferencesGitRepo(true), repo.GetArchive)
12511251
m.Combo("/forks").Get(repo.ListForks).
12521252
Post(reqToken(), reqRepoReader(unit.TypeCode), bind(api.CreateForkOption{}), repo.CreateFork)
12531253
m.Post("/merge-upstream", reqToken(), mustNotBeArchived, reqRepoWriter(unit.TypeCode), bind(api.MergeUpstreamRequest{}), repo.MergeUpstream)
@@ -1466,7 +1466,7 @@ func Routes() *web.Router {
14661466
m.Delete("", repo.DeleteAvatar)
14671467
}, reqAdmin(), reqToken())
14681468

1469-
m.Methods("HEAD,GET", "/{ball_type:tarball|zipball|bundle}/*", reqRepoReader(unit.TypeCode), repo.DownloadArchive)
1469+
m.Methods("HEAD,GET", "/{ball_type:tarball|zipball|bundle}/*", reqRepoReader(unit.TypeCode), context.ReferencesGitRepo(true), repo.DownloadArchive)
14701470
}, repoAssignment(), checkTokenPublicOnly())
14711471
}, tokenRequiresScopes(auth_model.AccessTokenScopeCategoryRepository))
14721472

routers/api/v1/repo/download.go

Lines changed: 17 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,29 @@
44
package repo
55

66
import (
7+
"errors"
78
"net/http"
89

910
"code.gitea.io/gitea/modules/git"
10-
"code.gitea.io/gitea/modules/gitrepo"
1111
"code.gitea.io/gitea/services/context"
1212
archiver_service "code.gitea.io/gitea/services/repository/archiver"
1313
)
1414

15+
func serveRepoArchive(ctx *context.APIContext, reqFileName string) {
16+
aReq, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, reqFileName)
17+
if err != nil {
18+
if errors.Is(err, archiver_service.ErrUnknownArchiveFormat{}) {
19+
ctx.APIError(http.StatusBadRequest, err)
20+
} else if errors.Is(err, archiver_service.RepoRefNotFoundError{}) {
21+
ctx.APIError(http.StatusNotFound, err)
22+
} else {
23+
ctx.APIErrorInternal(err)
24+
}
25+
return
26+
}
27+
archiver_service.ServeRepoArchive(ctx.Base, ctx.Repo.Repository, ctx.Repo.GitRepo, aReq)
28+
}
29+
1530
func DownloadArchive(ctx *context.APIContext) {
1631
var tp git.ArchiveType
1732
switch ballType := ctx.PathParam("ball_type"); ballType {
@@ -25,27 +40,5 @@ func DownloadArchive(ctx *context.APIContext) {
2540
ctx.APIError(http.StatusBadRequest, "Unknown archive type: "+ballType)
2641
return
2742
}
28-
29-
if ctx.Repo.GitRepo == nil {
30-
var err error
31-
ctx.Repo.GitRepo, err = gitrepo.RepositoryFromRequestContextOrOpen(ctx, ctx.Repo.Repository)
32-
if err != nil {
33-
ctx.APIErrorInternal(err)
34-
return
35-
}
36-
}
37-
38-
r, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, ctx.PathParam("*")+"."+tp.String())
39-
if err != nil {
40-
ctx.APIErrorInternal(err)
41-
return
42-
}
43-
44-
archive, err := r.Await(ctx)
45-
if err != nil {
46-
ctx.APIErrorInternal(err)
47-
return
48-
}
49-
50-
download(ctx, r.GetArchiveName(), archive)
43+
serveRepoArchive(ctx, ctx.PathParam("*")+"."+tp.String())
5144
}

routers/api/v1/repo/file.go

Lines changed: 1 addition & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@ import (
1515
"time"
1616

1717
git_model "code.gitea.io/gitea/models/git"
18-
repo_model "code.gitea.io/gitea/models/repo"
1918
"code.gitea.io/gitea/modules/git"
20-
"code.gitea.io/gitea/modules/gitrepo"
2119
"code.gitea.io/gitea/modules/httpcache"
2220
"code.gitea.io/gitea/modules/json"
2321
"code.gitea.io/gitea/modules/lfs"
@@ -31,7 +29,6 @@ import (
3129
"code.gitea.io/gitea/routers/common"
3230
"code.gitea.io/gitea/services/context"
3331
pull_service "code.gitea.io/gitea/services/pull"
34-
archiver_service "code.gitea.io/gitea/services/repository/archiver"
3532
files_service "code.gitea.io/gitea/services/repository/files"
3633
)
3734

@@ -282,74 +279,7 @@ func GetArchive(ctx *context.APIContext) {
282279
// "404":
283280
// "$ref": "#/responses/notFound"
284281

285-
if ctx.Repo.GitRepo == nil {
286-
var err error
287-
ctx.Repo.GitRepo, err = gitrepo.RepositoryFromRequestContextOrOpen(ctx, ctx.Repo.Repository)
288-
if err != nil {
289-
ctx.APIErrorInternal(err)
290-
return
291-
}
292-
}
293-
294-
archiveDownload(ctx)
295-
}
296-
297-
func archiveDownload(ctx *context.APIContext) {
298-
aReq, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, ctx.PathParam("*"))
299-
if err != nil {
300-
if errors.Is(err, archiver_service.ErrUnknownArchiveFormat{}) {
301-
ctx.APIError(http.StatusBadRequest, err)
302-
} else if errors.Is(err, archiver_service.RepoRefNotFoundError{}) {
303-
ctx.APIError(http.StatusNotFound, err)
304-
} else {
305-
ctx.APIErrorInternal(err)
306-
}
307-
return
308-
}
309-
310-
archiver, err := aReq.Await(ctx)
311-
if err != nil {
312-
ctx.APIErrorInternal(err)
313-
return
314-
}
315-
316-
download(ctx, aReq.GetArchiveName(), archiver)
317-
}
318-
319-
func download(ctx *context.APIContext, archiveName string, archiver *repo_model.RepoArchiver) {
320-
downloadName := ctx.Repo.Repository.Name + "-" + archiveName
321-
322-
// Add nix format link header so tarballs lock correctly:
323-
// https://github.com/nixos/nix/blob/56763ff918eb308db23080e560ed2ea3e00c80a7/doc/manual/src/protocols/tarball-fetcher.md
324-
ctx.Resp.Header().Add("Link", fmt.Sprintf(`<%s/archive/%s.%s?rev=%s>; rel="immutable"`,
325-
ctx.Repo.Repository.APIURL(),
326-
archiver.CommitID,
327-
archiver.Type.String(),
328-
archiver.CommitID,
329-
))
330-
331-
rPath := archiver.RelativePath()
332-
if setting.RepoArchive.Storage.ServeDirect() {
333-
// If we have a signed url (S3, object storage), redirect to this directly.
334-
u, err := storage.RepoArchives.URL(rPath, downloadName, ctx.Req.Method, nil)
335-
if u != nil && err == nil {
336-
ctx.Redirect(u.String())
337-
return
338-
}
339-
}
340-
341-
// If we have matched and access to release or issue
342-
fr, err := storage.RepoArchives.Open(rPath)
343-
if err != nil {
344-
ctx.APIErrorInternal(err)
345-
return
346-
}
347-
defer fr.Close()
348-
349-
ctx.ServeContent(fr, &context.ServeHeaderOptions{
350-
Filename: downloadName,
351-
LastModified: archiver.CreatedUnix.AsLocalTime(),
352-
})
282+
serveRepoArchive(ctx, ctx.PathParam("*"))
353283
}
354284

355285
// GetEditorconfig get editor config of a repository

routers/web/repo/repo.go

Lines changed: 7 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ import (
2424
"code.gitea.io/gitea/modules/optional"
2525
repo_module "code.gitea.io/gitea/modules/repository"
2626
"code.gitea.io/gitea/modules/setting"
27-
"code.gitea.io/gitea/modules/storage"
2827
api "code.gitea.io/gitea/modules/structs"
2928
"code.gitea.io/gitea/modules/templates"
3029
"code.gitea.io/gitea/modules/util"
@@ -376,53 +375,19 @@ func Download(ctx *context.Context) {
376375
}
377376
return
378377
}
379-
380-
archiver, err := aReq.Await(ctx)
381-
if err != nil {
382-
ctx.ServerError("archiver.Await", err)
383-
return
384-
}
385-
386-
download(ctx, aReq.GetArchiveName(), archiver)
387-
}
388-
389-
func download(ctx *context.Context, archiveName string, archiver *repo_model.RepoArchiver) {
390-
downloadName := ctx.Repo.Repository.Name + "-" + archiveName
391-
392-
// Add nix format link header so tarballs lock correctly:
393-
// https://github.com/nixos/nix/blob/56763ff918eb308db23080e560ed2ea3e00c80a7/doc/manual/src/protocols/tarball-fetcher.md
394-
ctx.Resp.Header().Add("Link", fmt.Sprintf(`<%s/archive/%s.tar.gz?rev=%s>; rel="immutable"`,
395-
ctx.Repo.Repository.APIURL(),
396-
archiver.CommitID, archiver.CommitID))
397-
398-
rPath := archiver.RelativePath()
399-
if setting.RepoArchive.Storage.ServeDirect() {
400-
// If we have a signed url (S3, object storage), redirect to this directly.
401-
u, err := storage.RepoArchives.URL(rPath, downloadName, ctx.Req.Method, nil)
402-
if u != nil && err == nil {
403-
ctx.Redirect(u.String())
404-
return
405-
}
406-
}
407-
408-
// If we have matched and access to release or issue
409-
fr, err := storage.RepoArchives.Open(rPath)
410-
if err != nil {
411-
ctx.ServerError("Open", err)
412-
return
413-
}
414-
defer fr.Close()
415-
416-
ctx.ServeContent(fr, &context.ServeHeaderOptions{
417-
Filename: downloadName,
418-
LastModified: archiver.CreatedUnix.AsLocalTime(),
419-
})
378+
archiver_service.ServeRepoArchive(ctx.Base, ctx.Repo.Repository, ctx.Repo.GitRepo, aReq)
420379
}
421380

422381
// InitiateDownload will enqueue an archival request, as needed. It may submit
423382
// a request that's already in-progress, but the archiver service will just
424383
// kind of drop it on the floor if this is the case.
425384
func InitiateDownload(ctx *context.Context) {
385+
if setting.Repository.StreamArchives {
386+
ctx.JSON(http.StatusOK, map[string]any{
387+
"complete": true,
388+
})
389+
return
390+
}
426391
aReq, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, ctx.PathParam("*"))
427392
if err != nil {
428393
ctx.HTTPError(http.StatusBadRequest, "invalid archive request")

services/repository/archiver/archiver.go

Lines changed: 76 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"errors"
99
"fmt"
1010
"io"
11+
"net/http"
1112
"os"
1213
"strings"
1314
"time"
@@ -17,11 +18,13 @@ import (
1718
"code.gitea.io/gitea/modules/git"
1819
"code.gitea.io/gitea/modules/gitrepo"
1920
"code.gitea.io/gitea/modules/graceful"
21+
"code.gitea.io/gitea/modules/httplib"
2022
"code.gitea.io/gitea/modules/log"
2123
"code.gitea.io/gitea/modules/process"
2224
"code.gitea.io/gitea/modules/queue"
2325
"code.gitea.io/gitea/modules/setting"
2426
"code.gitea.io/gitea/modules/storage"
27+
gitea_context "code.gitea.io/gitea/services/context"
2528
)
2629

2730
// ArchiveRequest defines the parameters of an archive request, which notably
@@ -138,6 +141,25 @@ func (aReq *ArchiveRequest) Await(ctx context.Context) (*repo_model.RepoArchiver
138141
}
139142
}
140143

144+
// Stream satisfies the ArchiveRequest being passed in. Processing
145+
// will occur directly in this routine.
146+
func (aReq *ArchiveRequest) Stream(ctx context.Context, gitRepo *git.Repository, w io.Writer) error {
147+
if aReq.Type == git.ArchiveBundle {
148+
return gitRepo.CreateBundle(
149+
ctx,
150+
aReq.CommitID,
151+
w,
152+
)
153+
}
154+
return gitRepo.CreateArchive(
155+
ctx,
156+
aReq.Type,
157+
w,
158+
setting.Repository.PrefixArchiveFiles,
159+
aReq.CommitID,
160+
)
161+
}
162+
141163
// doArchive satisfies the ArchiveRequest being passed in. Processing
142164
// will occur in a separate goroutine, as this phase may take a while to
143165
// complete. If the archive already exists, doArchive will not do
@@ -204,31 +226,17 @@ func doArchive(ctx context.Context, r *ArchiveRequest) (*repo_model.RepoArchiver
204226
}
205227
defer gitRepo.Close()
206228

207-
go func(done chan error, w *io.PipeWriter, archiver *repo_model.RepoArchiver, gitRepo *git.Repository) {
229+
go func(done chan error, w *io.PipeWriter, archiveReq *ArchiveRequest, gitRepo *git.Repository) {
208230
defer func() {
209231
if r := recover(); r != nil {
210232
done <- fmt.Errorf("%v", r)
211233
}
212234
}()
213235

214-
if archiver.Type == git.ArchiveBundle {
215-
err = gitRepo.CreateBundle(
216-
ctx,
217-
archiver.CommitID,
218-
w,
219-
)
220-
} else {
221-
err = gitRepo.CreateArchive(
222-
ctx,
223-
archiver.Type,
224-
w,
225-
setting.Repository.PrefixArchiveFiles,
226-
archiver.CommitID,
227-
)
228-
}
236+
err := archiveReq.Stream(ctx, gitRepo, w)
229237
_ = w.CloseWithError(err)
230238
done <- err
231-
}(done, w, archiver, gitRepo)
239+
}(done, w, r, gitRepo)
232240

233241
// TODO: add lfs data to zip
234242
// TODO: add submodule data to zip
@@ -338,3 +346,54 @@ func DeleteRepositoryArchives(ctx context.Context) error {
338346
}
339347
return storage.Clean(storage.RepoArchives)
340348
}
349+
350+
func ServeRepoArchive(ctx *gitea_context.Base, repo *repo_model.Repository, gitRepo *git.Repository, archiveReq *ArchiveRequest) {
351+
// Add nix format link header so tarballs lock correctly:
352+
// https://github.com/nixos/nix/blob/56763ff918eb308db23080e560ed2ea3e00c80a7/doc/manual/src/protocols/tarball-fetcher.md
353+
ctx.Resp.Header().Add("Link", fmt.Sprintf(`<%s/archive/%s.%s?rev=%s>; rel="immutable"`,
354+
repo.APIURL(),
355+
archiveReq.CommitID,
356+
archiveReq.Type.String(),
357+
archiveReq.CommitID,
358+
))
359+
downloadName := repo.Name + "-" + archiveReq.GetArchiveName()
360+
361+
if setting.Repository.StreamArchives {
362+
httplib.ServeSetHeaders(ctx.Resp, &httplib.ServeHeaderOptions{Filename: downloadName})
363+
if err := archiveReq.Stream(ctx, gitRepo, ctx.Resp); err != nil && !ctx.Written() {
364+
log.Error("Archive %v streaming failed: %v", archiveReq, err)
365+
ctx.HTTPError(http.StatusInternalServerError)
366+
}
367+
return
368+
}
369+
370+
archiver, err := archiveReq.Await(ctx)
371+
if err != nil {
372+
log.Error("Archive %v await failed: %v", archiveReq, err)
373+
ctx.HTTPError(http.StatusInternalServerError)
374+
return
375+
}
376+
377+
rPath := archiver.RelativePath()
378+
if setting.RepoArchive.Storage.ServeDirect() {
379+
// If we have a signed url (S3, object storage), redirect to this directly.
380+
u, err := storage.RepoArchives.URL(rPath, downloadName, ctx.Req.Method, nil)
381+
if u != nil && err == nil {
382+
ctx.Redirect(u.String())
383+
return
384+
}
385+
}
386+
387+
fr, err := storage.RepoArchives.Open(rPath)
388+
if err != nil {
389+
log.Error("Archive %v open file failed: %v", archiveReq, err)
390+
ctx.HTTPError(http.StatusInternalServerError)
391+
return
392+
}
393+
defer fr.Close()
394+
395+
ctx.ServeContent(fr, &gitea_context.ServeHeaderOptions{
396+
Filename: downloadName,
397+
LastModified: archiver.CreatedUnix.AsLocalTime(),
398+
})
399+
}

0 commit comments

Comments
 (0)