Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
2abef73
Refactor CatFile batch implementation and introduce batch-command for…
lunny Jun 8, 2025
bb8f1cf
improvements
lunny Jun 8, 2025
db4813a
Fix command arg
lunny Jun 8, 2025
334eb9e
Fix linkt error
lunny Jun 8, 2025
2454ab6
Merge branch 'main' into lunny/catfile_batch_refactor
lunny Jun 18, 2025
af97cab
Merge branch 'main' into lunny/catfile_batch_refactor
lunny Sep 2, 2025
646c509
Merge branch 'main' into lunny/catfile_batch_refactor
lunny Sep 5, 2025
c7b3bf3
Merge branch 'main' into lunny/catfile_batch_refactor
lunny Sep 29, 2025
d364bd6
Merge branch 'lunny/catfile_batch_refactor' of github.com:lunny/gitea…
lunny Sep 29, 2025
2166634
some improvements
lunny Sep 29, 2025
3242f31
Merge branch 'main' into lunny/catfile_batch_refactor
lunny Oct 9, 2025
307a6c7
Delay to create the batch go routine
lunny Oct 9, 2025
824e04b
Merge branch 'main' into lunny/catfile_batch_refactor
lunny Oct 18, 2025
825c48d
Merge branch 'main' into lunny/catfile_batch_refactor
lunny Oct 18, 2025
efc4d43
Merge branch 'main' into lunny/catfile_batch_refactor
lunny Oct 20, 2025
db4b7e4
Add test for BatchCommand
lunny Oct 22, 2025
b39e2f2
Merge branch 'main' into lunny/catfile_batch_refactor
lunny Oct 22, 2025
a8e8890
Merge branch 'lunny/catfile_batch_refactor' of github.com:lunny/gitea…
lunny Oct 22, 2025
2f39de1
adjust batch argument
lunny Oct 22, 2025
87e12a1
Remove unnecessary added test repo
lunny Oct 22, 2025
ea16a3e
Fix lint and test
lunny Oct 22, 2025
a885f07
Merge branch 'main' into lunny/catfile_batch_refactor
lunny Oct 25, 2025
4c6d91a
refactor
wxiaoguang Oct 26, 2025
872918f
refactor
wxiaoguang Oct 26, 2025
5dde21a
refactor
wxiaoguang Oct 26, 2025
832f5e4
refactor
wxiaoguang Oct 26, 2025
e2f4663
clean up
wxiaoguang Oct 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 1 addition & 27 deletions modules/git/attribute/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,11 @@
package attribute

import (
"fmt"
"os"
"testing"

"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
)

func testRun(m *testing.M) error {
gitHomePath, err := os.MkdirTemp(os.TempDir(), "git-home")
if err != nil {
return fmt.Errorf("unable to create temp dir: %w", err)
}
defer util.RemoveAll(gitHomePath)
setting.Git.HomePath = gitHomePath

if err = git.InitFull(); err != nil {
return fmt.Errorf("failed to call Init: %w", err)
}

exitCode := m.Run()
if exitCode != 0 {
return fmt.Errorf("run test failed, ExitCode=%d", exitCode)
}
return nil
}

func TestMain(m *testing.M) {
if err := testRun(m); err != nil {
_, _ = fmt.Fprintf(os.Stderr, "Test failed: %v", err)
os.Exit(1)
}
git.RunGitTests(m)
}
41 changes: 12 additions & 29 deletions modules/git/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,40 +8,23 @@ import (
"context"
)

type Batch struct {
cancel context.CancelFunc
Reader *bufio.Reader
Writer WriteCloserError
type CatFileBatchQueryContent interface {
QueryContent([]byte) (int, error)
ContentReader() *bufio.Reader
}

// NewBatch creates a new batch for the given repository, the Close must be invoked before release the batch
func NewBatch(ctx context.Context, repoPath string) (*Batch, error) {
// Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first!
if err := ensureValidGitRepository(ctx, repoPath); err != nil {
return nil, err
}

var batch Batch
batch.Writer, batch.Reader, batch.cancel = catFileBatch(ctx, repoPath)
return &batch, nil
}
type CatFileBatch interface {
CatFileBatchQueryContent

func NewBatchCheck(ctx context.Context, repoPath string) (*Batch, error) {
// Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first!
if err := ensureValidGitRepository(ctx, repoPath); err != nil {
return nil, err
}
QueryInfo([]byte) (int, error)
InfoReader() *bufio.Reader

var check Batch
check.Writer, check.Reader, check.cancel = catFileBatchCheck(ctx, repoPath)
return &check, nil
Close()
}

func (b *Batch) Close() {
if b.cancel != nil {
b.cancel()
b.Reader = nil
b.Writer = nil
b.cancel = nil
func NewBatch(ctx context.Context, repoPath string) (CatFileBatch, error) {
if DefaultFeatures().SupportCatFileBatchCommand {
return newBatchCommandCatFile(ctx, repoPath)
}
return newBatchCatFileWithCheck(ctx, repoPath)
}
63 changes: 63 additions & 0 deletions modules/git/batch_command.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package git

import (
"bufio"
"context"

"code.gitea.io/gitea/modules/git/gitcmd"
)

// catFileBatchCommand implements the CatFileBatch interface using the "cat-file --batch-command" command
// for git version >= 2.36
// ref: https://git-scm.com/docs/git-cat-file#Documentation/git-cat-file.txt---batch-command
type catFileBatchCommand struct {
ctx context.Context
repoPath string
batch *catFileBatchCommunicator
}

var _ CatFileBatch = (*catFileBatchCommand)(nil)

func newBatchCommandCatFile(ctx context.Context, repoPath string) (*catFileBatchCommand, error) {
if err := ensureValidGitRepository(ctx, repoPath); err != nil {
return nil, err
}
return &catFileBatchCommand{
ctx: ctx,
repoPath: repoPath,
}, nil
}

func (b *catFileBatchCommand) getBatch() *catFileBatchCommunicator {
if b.batch != nil {
return b.batch
}
b.batch = newCatFileBatch(b.ctx, b.repoPath, gitcmd.NewCommand("cat-file", "--batch-command"))
return b.batch
}

func (b *catFileBatchCommand) QueryContent(bs []byte) (int, error) {
return b.getBatch().writer.Write(append([]byte("contents "), bs...))
}

func (b *catFileBatchCommand) QueryInfo(bs []byte) (int, error) {
return b.getBatch().writer.Write(append([]byte("info "), bs...))
}

func (b *catFileBatchCommand) ContentReader() *bufio.Reader {
return b.getBatch().reader
}

func (b *catFileBatchCommand) InfoReader() *bufio.Reader {
return b.getBatch().reader
}

func (b *catFileBatchCommand) Close() {
if b.batch != nil {
b.batch.Close()
b.batch = nil
}
}
79 changes: 79 additions & 0 deletions modules/git/batch_legacy.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package git

import (
"bufio"
"context"

"code.gitea.io/gitea/modules/git/gitcmd"
)

// catFileBatchLegacy implements the CatFileBatch interface using the "cat-file --batch" command and "cat-file --batch-check" command
// for git version < 2.36
// to align with "--batch-command", it creates the two commands for querying object contents and object info separately
// ref: https://git-scm.com/docs/git-cat-file#Documentation/git-cat-file.txt---batch
type catFileBatchLegacy struct {
ctx context.Context
repoPath string
batch *catFileBatchCommunicator
batchCheck *catFileBatchCommunicator
}

var _ CatFileBatch = (*catFileBatchLegacy)(nil)

// newBatchCatFileWithCheck creates a new batch and a new batch check for the given repository, the Close must be invoked before release the batch
func newBatchCatFileWithCheck(ctx context.Context, repoPath string) (*catFileBatchLegacy, error) {
if err := ensureValidGitRepository(ctx, repoPath); err != nil {
return nil, err
}

return &catFileBatchLegacy{
ctx: ctx,
repoPath: repoPath,
}, nil
}

func (b *catFileBatchLegacy) getBatch() *catFileBatchCommunicator {
if b.batch != nil {
return b.batch
}
b.batch = newCatFileBatch(b.ctx, b.repoPath, gitcmd.NewCommand("cat-file", "--batch"))
return b.batch
}

func (b *catFileBatchLegacy) getBatchCheck() *catFileBatchCommunicator {
if b.batchCheck != nil {
return b.batchCheck
}
b.batchCheck = newCatFileBatch(b.ctx, b.repoPath, gitcmd.NewCommand("cat-file", "--batch-check"))
return b.batchCheck
}

func (b *catFileBatchLegacy) QueryContent(bs []byte) (int, error) {
return b.getBatch().writer.Write(bs)
}

func (b *catFileBatchLegacy) QueryInfo(bs []byte) (int, error) {
return b.getBatchCheck().writer.Write(bs)
}

func (b *catFileBatchLegacy) ContentReader() *bufio.Reader {
return b.getBatch().reader
}

func (b *catFileBatchLegacy) InfoReader() *bufio.Reader {
return b.getBatchCheck().reader
}

func (b *catFileBatchLegacy) Close() {
if b.batch != nil {
b.batch.Close()
b.batch = nil
}
if b.batchCheck != nil {
b.batchCheck.Close()
b.batchCheck = nil
}
}
84 changes: 31 additions & 53 deletions modules/git/batch_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,33 @@ import (
"github.com/djherbis/nio/v3"
)

// WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function
type WriteCloserError interface {
// writeCloserError wraps an io.WriteCloser with an additional CloseWithError function (for nio.Pipe)
type writeCloserError interface {
io.WriteCloser
CloseWithError(err error) error
}

type catFileBatchCommunicator struct {
cancel context.CancelFunc
reader *bufio.Reader
writer writeCloserError
}

func (b *catFileBatchCommunicator) Close() {
if b.cancel != nil {
b.cancel()
b.reader = nil
b.writer = nil
b.cancel = nil
}
}

// ensureValidGitRepository runs git rev-parse in the repository path - thus ensuring that the repository is a valid repository.
// Run before opening git cat-file.
// This is needed otherwise the git cat-file will hang for invalid repositories.
// FIXME: the comment is from https://github.com/go-gitea/gitea/pull/17991 but it doesn't seem to be true.
// The real problem is that Golang's Cmd.Wait hangs because it waits for the pipes to be closed, but we can't close the pipes before Wait returns
// Need to refactor to use StdinPipe and StdoutPipe
func ensureValidGitRepository(ctx context.Context, repoPath string) error {
stderr := strings.Builder{}
err := gitcmd.NewCommand("rev-parse").
Expand All @@ -40,53 +58,9 @@ func ensureValidGitRepository(ctx context.Context, repoPath string) error {
return nil
}

// catFileBatchCheck opens git cat-file --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function
func catFileBatchCheck(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) {
batchStdinReader, batchStdinWriter := io.Pipe()
batchStdoutReader, batchStdoutWriter := io.Pipe()
ctx, ctxCancel := context.WithCancel(ctx)
closed := make(chan struct{})
cancel := func() {
ctxCancel()
_ = batchStdoutReader.Close()
_ = batchStdinWriter.Close()
<-closed
}

// Ensure cancel is called as soon as the provided context is cancelled
go func() {
<-ctx.Done()
cancel()
}()

go func() {
stderr := strings.Builder{}
err := gitcmd.NewCommand("cat-file", "--batch-check").
WithDir(repoPath).
WithStdin(batchStdinReader).
WithStdout(batchStdoutWriter).
WithStderr(&stderr).
WithUseContextTimeout(true).
Run(ctx)
if err != nil {
_ = batchStdoutWriter.CloseWithError(gitcmd.ConcatenateError(err, (&stderr).String()))
_ = batchStdinReader.CloseWithError(gitcmd.ConcatenateError(err, (&stderr).String()))
} else {
_ = batchStdoutWriter.Close()
_ = batchStdinReader.Close()
}
close(closed)
}()

// For simplicities sake we'll use a buffered reader to read from the cat-file --batch-check
batchReader := bufio.NewReader(batchStdoutReader)

return batchStdinWriter, batchReader, cancel
}

// catFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function
func catFileBatch(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) {
// We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
// newCatFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function
func newCatFileBatch(ctx context.Context, repoPath string, cmdCatFile *gitcmd.Command) *catFileBatchCommunicator {
// We often want to feed the commits in order into cat-file --batch, followed by their trees and subtrees as necessary.
// so let's create a batch stdin and stdout
batchStdinReader, batchStdinWriter := io.Pipe()
batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(32 * 1024))
Expand All @@ -107,7 +81,7 @@ func catFileBatch(ctx context.Context, repoPath string) (WriteCloserError, *bufi

go func() {
stderr := strings.Builder{}
err := gitcmd.NewCommand("cat-file", "--batch").
err := cmdCatFile.
WithDir(repoPath).
WithStdin(batchStdinReader).
WithStdout(batchStdoutWriter).
Expand All @@ -124,10 +98,14 @@ func catFileBatch(ctx context.Context, repoPath string) (WriteCloserError, *bufi
close(closed)
}()

// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
// use a buffered reader to read from the cat-file --batch (StringReader.ReadString)
batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024)

return batchStdinWriter, batchReader, cancel
return &catFileBatchCommunicator{
writer: batchStdinWriter,
reader: batchReader,
cancel: cancel,
}
}

// ReadBatchLine reads the header line from cat-file --batch
Expand Down Expand Up @@ -225,7 +203,7 @@ headerLoop:
// constant hextable to help quickly convert between binary and hex representation
const hextable = "0123456789abcdef"

// BinToHexHeash converts a binary Hash into a hex encoded one. Input and output can be the
// BinToHex converts a binary Hash into a hex encoded one. Input and output can be the
// same byte slice to support in place conversion without allocations.
// This is at least 100x quicker that hex.EncodeToString
func BinToHex(objectFormat ObjectFormat, sha, out []byte) []byte {
Expand Down
Loading
Loading