Skip to content
Open
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
2abef73
Refactor CatFile batch implementation and introduce batch-command for…
lunny Jun 8, 2025
bb8f1cf
improvements
lunny Jun 8, 2025
db4813a
Fix command arg
lunny Jun 8, 2025
334eb9e
Fix linkt error
lunny Jun 8, 2025
2454ab6
Merge branch 'main' into lunny/catfile_batch_refactor
lunny Jun 18, 2025
af97cab
Merge branch 'main' into lunny/catfile_batch_refactor
lunny Sep 2, 2025
646c509
Merge branch 'main' into lunny/catfile_batch_refactor
lunny Sep 5, 2025
c7b3bf3
Merge branch 'main' into lunny/catfile_batch_refactor
lunny Sep 29, 2025
d364bd6
Merge branch 'lunny/catfile_batch_refactor' of github.com:lunny/gitea…
lunny Sep 29, 2025
2166634
some improvements
lunny Sep 29, 2025
3242f31
Merge branch 'main' into lunny/catfile_batch_refactor
lunny Oct 9, 2025
307a6c7
Delay to create the batch go routine
lunny Oct 9, 2025
824e04b
Merge branch 'main' into lunny/catfile_batch_refactor
lunny Oct 18, 2025
825c48d
Merge branch 'main' into lunny/catfile_batch_refactor
lunny Oct 18, 2025
efc4d43
Merge branch 'main' into lunny/catfile_batch_refactor
lunny Oct 20, 2025
db4b7e4
Add test for BatchCommand
lunny Oct 22, 2025
b39e2f2
Merge branch 'main' into lunny/catfile_batch_refactor
lunny Oct 22, 2025
a8e8890
Merge branch 'lunny/catfile_batch_refactor' of github.com:lunny/gitea…
lunny Oct 22, 2025
2f39de1
adjust batch argument
lunny Oct 22, 2025
87e12a1
Remove unnecessary added test repo
lunny Oct 22, 2025
ea16a3e
Fix lint and test
lunny Oct 22, 2025
a885f07
Merge branch 'main' into lunny/catfile_batch_refactor
lunny Oct 25, 2025
4c6d91a
refactor
wxiaoguang Oct 26, 2025
872918f
refactor
wxiaoguang Oct 26, 2025
5dde21a
refactor
wxiaoguang Oct 26, 2025
832f5e4
refactor
wxiaoguang Oct 26, 2025
e2f4663
clean up
wxiaoguang Oct 26, 2025
cf6c1c5
Merge branch 'main' into lunny/catfile_batch_refactor
lunny Oct 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 129 additions & 16 deletions modules/git/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,40 +8,153 @@ import (
"context"
)

type Batch struct {
type batchCatFile struct {
cancel context.CancelFunc
Reader *bufio.Reader
Writer WriteCloserError
}

// NewBatch creates a new batch for the given repository, the Close must be invoked before release the batch
func NewBatch(ctx context.Context, repoPath string) (*Batch, error) {
func (b *batchCatFile) Close() {
if b.cancel != nil {
b.cancel()
b.Reader = nil
b.Writer = nil
b.cancel = nil
}
}

type Batch interface {
Write([]byte) (int, error)
WriteCheck([]byte) (int, error)
Reader() *bufio.Reader
CheckReader() *bufio.Reader
Close()
}

// batchCatFileWithCheck implements the Batch interface using the "cat-file --batch" command and "cat-file --batch-check" command
// ref: https://git-scm.com/docs/git-cat-file#Documentation/git-cat-file.txt---batch
// To align with --batch-command, we creates the two commands both at the same time if git version is lower than 2.36
type batchCatFileWithCheck struct {
ctx context.Context
repoPath string
batch *batchCatFile
batchCheck *batchCatFile
}

var _ Batch = &batchCatFileWithCheck{}

// newBatchCatFileWithCheck creates a new batch and a new batch check for the given repository, the Close must be invoked before release the batch
func newBatchCatFileWithCheck(ctx context.Context, repoPath string) (*batchCatFileWithCheck, error) {
// Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first!
if err := ensureValidGitRepository(ctx, repoPath); err != nil {
return nil, err
}

var batch Batch
batch.Writer, batch.Reader, batch.cancel = catFileBatch(ctx, repoPath)
return &batch, nil
return &batchCatFileWithCheck{
ctx: ctx,
repoPath: repoPath,
}, nil
}

func (b *batchCatFileWithCheck) getBatch() *batchCatFile {
if b.batch != nil {
return b.batch
}
b.batch = newCatFileBatch(b.ctx, b.repoPath, "--batch")
return b.batch
}

func (b *batchCatFileWithCheck) getBatchCheck() *batchCatFile {
if b.batchCheck != nil {
return b.batchCheck
}
b.batchCheck = newCatFileBatch(b.ctx, b.repoPath, "--batch-check")
return b.batchCheck
}

func (b *batchCatFileWithCheck) Write(bs []byte) (int, error) {
return b.getBatch().Writer.Write(bs)
}

func (b *batchCatFileWithCheck) WriteCheck(bs []byte) (int, error) {
return b.getBatchCheck().Writer.Write(bs)
}

func (b *batchCatFileWithCheck) Reader() *bufio.Reader {
return b.getBatch().Reader
}

func (b *batchCatFileWithCheck) CheckReader() *bufio.Reader {
return b.getBatchCheck().Reader
}

func NewBatchCheck(ctx context.Context, repoPath string) (*Batch, error) {
func (b *batchCatFileWithCheck) Close() {
if b.batch != nil {
b.batch.Close()
b.batch = nil
}
if b.batchCheck != nil {
b.batchCheck.Close()
b.batchCheck = nil
}
}

// batchCommandCatFile implements the Batch interface using the "cat-file --batch-command" command
// ref: https://git-scm.com/docs/git-cat-file#Documentation/git-cat-file.txt---batch-command
type batchCommandCatFile struct {
ctx context.Context
repoPath string
batch *batchCatFile
}

var _ Batch = &batchCommandCatFile{}

func newBatchCommandCatFile(ctx context.Context, repoPath string) (*batchCommandCatFile, error) {
// Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first!
if err := ensureValidGitRepository(ctx, repoPath); err != nil {
return nil, err
}

var check Batch
check.Writer, check.Reader, check.cancel = catFileBatchCheck(ctx, repoPath)
return &check, nil
return &batchCommandCatFile{
ctx: ctx,
repoPath: repoPath,
}, nil
}

func (b *Batch) Close() {
if b.cancel != nil {
b.cancel()
b.Reader = nil
b.Writer = nil
b.cancel = nil
func (b *batchCommandCatFile) getBatch() *batchCatFile {
if b.batch != nil {
return b.batch
}
b.batch = newCatFileBatch(b.ctx, b.repoPath, "--batch-command")
return b.batch
}

func (b *batchCommandCatFile) Write(bs []byte) (int, error) {
return b.getBatch().Writer.Write(append([]byte("contents "), bs...))
}

func (b *batchCommandCatFile) WriteCheck(bs []byte) (int, error) {
return b.getBatch().Writer.Write(append([]byte("info "), bs...))
}

func (b *batchCommandCatFile) Reader() *bufio.Reader {
return b.getBatch().Reader
}

func (b *batchCommandCatFile) CheckReader() *bufio.Reader {
return b.getBatch().Reader
}

func (b *batchCommandCatFile) Close() {
if b.batch != nil {
b.batch.Close()
b.batch = nil
}
}

func NewBatch(ctx context.Context, repoPath string) (Batch, error) {
if DefaultFeatures().SupportCatFileBatchCommand {
return newBatchCommandCatFile(ctx, repoPath)
}
return newBatchCatFileWithCheck(ctx, repoPath)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe it is not the first time that I told you that you must have correct tests

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests added at db4b7e4

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I told you to add test but didn't tell you to copy paste unrelated code everywhere.

You need to clearly test NewBatch, but not keeping polluting other tests

}
58 changes: 12 additions & 46 deletions modules/git/batch_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,52 +40,13 @@ func ensureValidGitRepository(ctx context.Context, repoPath string) error {
return nil
}

// catFileBatchCheck opens git cat-file --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function
func catFileBatchCheck(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) {
batchStdinReader, batchStdinWriter := io.Pipe()
batchStdoutReader, batchStdoutWriter := io.Pipe()
ctx, ctxCancel := context.WithCancel(ctx)
closed := make(chan struct{})
cancel := func() {
ctxCancel()
_ = batchStdoutReader.Close()
_ = batchStdinWriter.Close()
<-closed
// newCatFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function
// batchArg is the argument to pass to cat-file --batch, it could be "--batch", "--batch-command" or "--batch-check".
func newCatFileBatch(ctx context.Context, repoPath, batchArg string) *batchCatFile {
if batchArg != "--batch" && batchArg != "--batch-command" && batchArg != "--batch-check" {
panic("invalid batchArg: " + batchArg)
}

// Ensure cancel is called as soon as the provided context is cancelled
go func() {
<-ctx.Done()
cancel()
}()

go func() {
stderr := strings.Builder{}
err := gitcmd.NewCommand("cat-file", "--batch-check").
WithDir(repoPath).
WithStdin(batchStdinReader).
WithStdout(batchStdoutWriter).
WithStderr(&stderr).
WithUseContextTimeout(true).
Run(ctx)
if err != nil {
_ = batchStdoutWriter.CloseWithError(gitcmd.ConcatenateError(err, (&stderr).String()))
_ = batchStdinReader.CloseWithError(gitcmd.ConcatenateError(err, (&stderr).String()))
} else {
_ = batchStdoutWriter.Close()
_ = batchStdinReader.Close()
}
close(closed)
}()

// For simplicities sake we'll use a buffered reader to read from the cat-file --batch-check
batchReader := bufio.NewReader(batchStdoutReader)

return batchStdinWriter, batchReader, cancel
}

// catFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function
func catFileBatch(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) {
// We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
// so let's create a batch stdin and stdout
batchStdinReader, batchStdinWriter := io.Pipe()
Expand All @@ -107,7 +68,8 @@ func catFileBatch(ctx context.Context, repoPath string) (WriteCloserError, *bufi

go func() {
stderr := strings.Builder{}
err := gitcmd.NewCommand("cat-file", "--batch").
err := gitcmd.NewCommand("cat-file").
AddArguments(gitcmd.ToTrustCmdArg(batchArg)).
WithDir(repoPath).
WithStdin(batchStdinReader).
WithStdout(batchStdoutWriter).
Expand All @@ -127,7 +89,11 @@ func catFileBatch(ctx context.Context, repoPath string) (WriteCloserError, *bufi
// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024)

return batchStdinWriter, batchReader, cancel
return &batchCatFile{
Writer: batchStdinWriter,
Reader: batchReader,
cancel: cancel,
}
}

// ReadBatchLine reads the header line from cat-file --batch
Expand Down
11 changes: 6 additions & 5 deletions modules/git/blob_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,17 @@ type Blob struct {
// DataAsync gets a ReadCloser for the contents of a blob without reading it all.
// Calling the Close function on the result will discard all unread output.
func (b *Blob) DataAsync() (io.ReadCloser, error) {
wr, rd, cancel, err := b.repo.CatFileBatch(b.repo.Ctx)
batch, cancel, err := b.repo.CatFileBatch(b.repo.Ctx)
if err != nil {
return nil, err
}

_, err = wr.Write([]byte(b.ID.String() + "\n"))
_, err = batch.Write([]byte(b.ID.String() + "\n"))
if err != nil {
cancel()
return nil, err
}
rd := batch.Reader()
_, _, size, err := ReadBatchLine(rd)
if err != nil {
cancel()
Expand Down Expand Up @@ -67,18 +68,18 @@ func (b *Blob) Size() int64 {
return b.size
}

wr, rd, cancel, err := b.repo.CatFileBatchCheck(b.repo.Ctx)
batch, cancel, err := b.repo.CatFileBatch(b.repo.Ctx)
if err != nil {
log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
return 0
}
defer cancel()
_, err = wr.Write([]byte(b.ID.String() + "\n"))
_, err = batch.WriteCheck([]byte(b.ID.String() + "\n"))
if err != nil {
log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
return 0
}
_, _, b.size, err = ReadBatchLine(rd)
_, _, b.size, err = ReadBatchLine(batch.CheckReader())
if err != nil {
log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
return 0
Expand Down
12 changes: 7 additions & 5 deletions modules/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,12 @@ const RequiredVersion = "2.0.0" // the minimum Git version required
type Features struct {
gitVersion *version.Version

UsingGogit bool
SupportProcReceive bool // >= 2.29
SupportHashSha256 bool // >= 2.42, SHA-256 repositories no longer an ‘experimental curiosity’
SupportedObjectFormats []ObjectFormat // sha1, sha256
SupportCheckAttrOnBare bool // >= 2.40
UsingGogit bool
SupportProcReceive bool // >= 2.29
SupportHashSha256 bool // >= 2.42, SHA-256 repositories no longer an ‘experimental curiosity’
SupportedObjectFormats []ObjectFormat // sha1, sha256
SupportCheckAttrOnBare bool // >= 2.40
SupportCatFileBatchCommand bool // >= 2.36, support `git cat-file --batch-command`
}

var defaultFeatures *Features
Expand Down Expand Up @@ -75,6 +76,7 @@ func loadGitVersionFeatures() (*Features, error) {
features.SupportedObjectFormats = append(features.SupportedObjectFormats, Sha256ObjectFormat)
}
features.SupportCheckAttrOnBare = features.CheckVersionAtLeast("2.40")
features.SupportCatFileBatchCommand = features.CheckVersionAtLeast("2.36")
return features, nil
}

Expand Down
6 changes: 6 additions & 0 deletions modules/git/gitcmd/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,12 @@ func (c *Command) AddConfig(key, value string) *Command {
return c
}

// ToTrustCmdArg converts a string (trusted as argument) to CmdArg
// In most cases, it shouldn't be used. Use AddXxx function instead
func ToTrustCmdArg(arg string) internal.CmdArg {
return internal.CmdArg(arg)
}

// ToTrustedCmdArgs converts a list of strings (trusted as argument) to TrustedCmdArgs
// In most cases, it shouldn't be used. Use NewCommand().AddXxx() function instead
func ToTrustedCmdArgs(args []string) TrustedCmdArgs {
Expand Down
5 changes: 3 additions & 2 deletions modules/git/languagestats/language_stats_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,21 @@ import (
func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, error) {
// We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary.
// so let's create a batch stdin and stdout
batchStdinWriter, batchReader, cancel, err := repo.CatFileBatch(repo.Ctx)
batch, cancel, err := repo.CatFileBatch(repo.Ctx)
if err != nil {
return nil, err
}
defer cancel()

writeID := func(id string) error {
_, err := batchStdinWriter.Write([]byte(id + "\n"))
_, err := batch.Write([]byte(id + "\n"))
return err
}

if err := writeID(commitID); err != nil {
return nil, err
}
batchReader := batch.Reader()
shaBytes, typ, size, err := git.ReadBatchLine(batchReader)
if typ != "commit" {
log.Debug("Unable to get commit for: %s. Err: %v", commitID, err)
Expand Down
Loading