Skip to content

Commit d17784e

Browse files
committed
Introduce a new batch cat file method for preparing to replace the old one
1 parent dcbf6c2 commit d17784e

File tree

6 files changed

+287
-23
lines changed

6 files changed

+287
-23
lines changed

modules/git/batch_cat_file.go

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
// Copyright 2024 The Gitea Authors. All rights reserved.
2+
// SPDX-License-Identifier: MIT
3+
4+
package git
5+
6+
import (
7+
"bufio"
8+
"bytes"
9+
"context"
10+
"fmt"
11+
"io"
12+
"os"
13+
"os/exec"
14+
"strings"
15+
"time"
16+
17+
"code.gitea.io/gitea/modules/log"
18+
"code.gitea.io/gitea/modules/process"
19+
"code.gitea.io/gitea/modules/util"
20+
)
21+
22+
type BatchCatFile struct {
23+
cmd *exec.Cmd
24+
startTime time.Time
25+
stdin io.WriteCloser
26+
stdout io.ReadCloser
27+
cancel context.CancelFunc
28+
finished process.FinishedFunc
29+
}
30+
31+
func NewBatchCatFile(ctx context.Context, repoPath string) (*BatchCatFile, error) {
32+
callerInfo := util.CallerFuncName(1 /* util */ + 1 /* this */ + 1 /* parent */)
33+
if pos := strings.LastIndex(callerInfo, "/"); pos >= 0 {
34+
callerInfo = callerInfo[pos+1:]
35+
}
36+
37+
a := make([]string, 0, 4)
38+
a = append(a, debugQuote(GitExecutable))
39+
if len(globalCommandArgs) > 0 {
40+
a = append(a, "...global...")
41+
}
42+
a = append(a, "cat-file", "--batch")
43+
cmdLogString := strings.Join(a, " ")
44+
45+
// these logs are for debugging purposes only, so no guarantee of correctness or stability
46+
desc := fmt.Sprintf("git.Run(by:%s, repo:%s): %s", callerInfo, logArgSanitize(repoPath), cmdLogString)
47+
log.Debug("git.BatchCatFile: %s", desc)
48+
49+
ctx, cancel, finished := process.GetManager().AddContext(ctx, desc)
50+
51+
args := make([]string, 0, len(globalCommandArgs)+2)
52+
for _, arg := range globalCommandArgs {
53+
args = append(args, string(arg))
54+
}
55+
args = append(args, "cat-file", "--batch")
56+
cmd := exec.CommandContext(ctx, GitExecutable, args...)
57+
cmd.Env = append(os.Environ(), CommonGitCmdEnvs()...)
58+
cmd.Dir = repoPath
59+
process.SetSysProcAttribute(cmd)
60+
61+
stdin, err := cmd.StdinPipe()
62+
if err != nil {
63+
return nil, err
64+
}
65+
stdout, err := cmd.StdoutPipe()
66+
if err != nil {
67+
return nil, err
68+
}
69+
70+
if err := cmd.Start(); err != nil {
71+
return nil, err
72+
}
73+
74+
return &BatchCatFile{
75+
cmd: cmd,
76+
startTime: time.Now(),
77+
stdin: stdin,
78+
stdout: stdout,
79+
cancel: cancel,
80+
finished: finished,
81+
}, nil
82+
}
83+
84+
func (b *BatchCatFile) Input(refs ...string) error {
85+
var buf bytes.Buffer
86+
for _, ref := range refs {
87+
if _, err := buf.WriteString(ref + "\n"); err != nil {
88+
return err
89+
}
90+
}
91+
92+
_, err := b.stdin.Write(buf.Bytes())
93+
if err != nil {
94+
return err
95+
}
96+
97+
return nil
98+
}
99+
100+
func (b *BatchCatFile) Reader() *bufio.Reader {
101+
return bufio.NewReader(b.stdout)
102+
}
103+
104+
func (b *BatchCatFile) Escaped() time.Duration {
105+
return time.Since(b.startTime)
106+
}
107+
108+
func (b *BatchCatFile) Cancel() {
109+
b.cancel()
110+
}
111+
112+
func (b *BatchCatFile) Close() error {
113+
b.finished()
114+
_ = b.stdin.Close()
115+
log.Debug("git.BatchCatFile: %v", b.Escaped())
116+
return b.cmd.Wait()
117+
}

modules/git/batch_cat_file_test.go

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
// Copyright 2025 The Gitea Authors. All rights reserved.
2+
// SPDX-License-Identifier: MIT
3+
4+
package git
5+
6+
import (
7+
"context"
8+
"io"
9+
"path/filepath"
10+
"testing"
11+
"time"
12+
13+
"github.com/stretchr/testify/assert"
14+
)
15+
16+
func Test_GitBatchOperatorsNormal(t *testing.T) {
17+
bareRepo1Path := filepath.Join(testReposDir, "repo1_bare")
18+
batch, err := NewBatchCatFile(context.Background(), bareRepo1Path)
19+
assert.NoError(t, err)
20+
assert.NotNil(t, batch)
21+
defer batch.Close()
22+
23+
err = batch.Input("refs/heads/master")
24+
assert.NoError(t, err)
25+
rd := batch.Reader()
26+
assert.NotNil(t, rd)
27+
28+
_, typ, size, err := ReadBatchLine(rd)
29+
assert.NoError(t, err)
30+
assert.Equal(t, "commit", typ)
31+
assert.Equal(t, int64(1075), size)
32+
33+
// this step is very important, otherwise the next read will be wrong
34+
s, err := rd.Discard(int(size))
35+
assert.NoError(t, err)
36+
assert.EqualValues(t, size, s)
37+
38+
err = batch.Input("ce064814f4a0d337b333e646ece456cd39fab612")
39+
assert.NoError(t, err)
40+
assert.NotNil(t, rd)
41+
42+
_, typ, size, err = ReadBatchLine(rd)
43+
assert.NoError(t, err)
44+
assert.Equal(t, "commit", typ)
45+
assert.Equal(t, int64(1075), size)
46+
47+
s, err = rd.Discard(int(size))
48+
assert.NoError(t, err)
49+
assert.EqualValues(t, size, s)
50+
51+
kases := []struct {
52+
refname string
53+
size int64
54+
}{
55+
{"refs/heads/master", 1075},
56+
{"feaf4ba6bc635fec442f46ddd4512416ec43c2c2", 1074},
57+
{"37991dec2c8e592043f47155ce4808d4580f9123", 239},
58+
}
59+
60+
var inputs []string
61+
for _, kase := range kases {
62+
inputs = append(inputs, kase.refname)
63+
}
64+
65+
// input once for 3 refs
66+
err = batch.Input(inputs...)
67+
assert.NoError(t, err)
68+
assert.NotNil(t, rd)
69+
70+
for i := 0; i < 3; i++ {
71+
_, typ, size, err = ReadBatchLine(rd)
72+
assert.NoError(t, err)
73+
assert.Equal(t, "commit", typ)
74+
assert.Equal(t, kases[i].size, size)
75+
76+
s, err := rd.Discard(int(size))
77+
assert.NoError(t, err)
78+
assert.EqualValues(t, size, s)
79+
}
80+
81+
// input 3 times
82+
for _, input := range inputs {
83+
err = batch.Input(input)
84+
assert.NoError(t, err)
85+
assert.NotNil(t, rd)
86+
}
87+
88+
for i := 0; i < 3; i++ {
89+
_, typ, size, err = ReadBatchLine(rd)
90+
assert.NoError(t, err)
91+
assert.Equal(t, "commit", typ)
92+
assert.Equal(t, kases[i].size, size)
93+
94+
s, err := rd.Discard(int(size))
95+
assert.NoError(t, err)
96+
assert.EqualValues(t, size, s)
97+
}
98+
}
99+
100+
func Test_GitBatchOperatorsCancel(t *testing.T) {
101+
bareRepo1Path := filepath.Join(testReposDir, "repo1_bare")
102+
batch, err := NewBatchCatFile(context.Background(), bareRepo1Path)
103+
assert.NoError(t, err)
104+
assert.NotNil(t, batch)
105+
defer batch.Close()
106+
107+
err = batch.Input("refs/heads/master")
108+
assert.NoError(t, err)
109+
rd := batch.Reader()
110+
assert.NotNil(t, rd)
111+
112+
_, typ, size, err := ReadBatchLine(rd)
113+
assert.NoError(t, err)
114+
assert.Equal(t, "commit", typ)
115+
assert.Equal(t, int64(1075), size)
116+
117+
go func() {
118+
time.Sleep(time.Second)
119+
batch.Cancel()
120+
}()
121+
// block here to wait cancel
122+
_, err = io.ReadAll(rd)
123+
assert.NoError(t, err)
124+
}
125+
126+
func Test_GitBatchOperatorsTimeout(t *testing.T) {
127+
bareRepo1Path := filepath.Join(testReposDir, "repo1_bare")
128+
129+
ctx, _ := context.WithTimeout(context.Background(), 1*time.Second)
130+
131+
batch, err := NewBatchCatFile(ctx, bareRepo1Path)
132+
assert.NoError(t, err)
133+
assert.NotNil(t, batch)
134+
defer batch.Close()
135+
136+
err = batch.Input("refs/heads/master")
137+
assert.NoError(t, err)
138+
rd := batch.Reader()
139+
assert.NotNil(t, rd)
140+
141+
_, typ, size, err := ReadBatchLine(rd)
142+
assert.NoError(t, err)
143+
assert.Equal(t, "commit", typ)
144+
assert.Equal(t, int64(1075), size)
145+
// block here until timeout
146+
_, err = io.ReadAll(rd)
147+
assert.NoError(t, err)
148+
}

modules/git/batch_reader.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err er
165165
typ = typ[:idx]
166166

167167
size, err = strconv.ParseInt(sizeStr, 10, 64)
168+
168169
return sha, typ, size, err
169170
}
170171

modules/git/command.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,16 @@ func logArgSanitize(arg string) string {
6060
return arg
6161
}
6262

63+
var debugQuote = func(s string) string {
64+
if strings.ContainsAny(s, " `'\"\t\r\n") {
65+
return fmt.Sprintf("%q", s)
66+
}
67+
return s
68+
}
69+
6370
func (c *Command) LogString() string {
6471
// WARNING: this function is for debugging purposes only. It's much better than old code (which only joins args with space),
6572
// It's impossible to make a simple and 100% correct implementation of argument quoting for different platforms here.
66-
debugQuote := func(s string) string {
67-
if strings.ContainsAny(s, " `'\"\t\r\n") {
68-
return fmt.Sprintf("%q", s)
69-
}
70-
return s
71-
}
7273
a := make([]string, 0, len(c.args)+1)
7374
a = append(a, debugQuote(c.prog))
7475
if c.globalArgsLength > 0 {

modules/git/commit_info_nogogit.go

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -124,11 +124,13 @@ func GetLastCommitForPaths(ctx context.Context, commit *Commit, treePath string,
124124
return nil, err
125125
}
126126

127-
batchStdinWriter, batchReader, cancel, err := commit.repo.CatFileBatch(ctx)
127+
batch, err := NewBatchCatFile(ctx, commit.repo.Path)
128128
if err != nil {
129129
return nil, err
130130
}
131-
defer cancel()
131+
defer batch.Close()
132+
133+
rd := batch.Reader()
132134

133135
commitsMap := map[string]*Commit{}
134136
commitsMap[commit.ID.String()] = commit
@@ -145,25 +147,24 @@ func GetLastCommitForPaths(ctx context.Context, commit *Commit, treePath string,
145147
continue
146148
}
147149

148-
_, err := batchStdinWriter.Write([]byte(commitID + "\n"))
149-
if err != nil {
150+
if err := batch.Input(commitID); err != nil {
150151
return nil, err
151152
}
152-
_, typ, size, err := ReadBatchLine(batchReader)
153+
_, typ, size, err := ReadBatchLine(rd)
153154
if err != nil {
154155
return nil, err
155156
}
156157
if typ != "commit" {
157-
if err := DiscardFull(batchReader, size+1); err != nil {
158+
if err := DiscardFull(rd, size+1); err != nil {
158159
return nil, err
159160
}
160161
return nil, fmt.Errorf("unexpected type: %s for commit id: %s", typ, commitID)
161162
}
162-
c, err = CommitFromReader(commit.repo, MustIDFromString(commitID), io.LimitReader(batchReader, size))
163+
c, err = CommitFromReader(commit.repo, MustIDFromString(commitID), io.LimitReader(rd, size))
163164
if err != nil {
164165
return nil, err
165166
}
166-
if _, err := batchReader.Discard(1); err != nil {
167+
if _, err := rd.Discard(1); err != nil {
167168
return nil, err
168169
}
169170
commitCommits[path] = c

modules/git/repo_language_stats_nogogit.go

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,16 @@ import (
2020
func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
2121
// We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary.
2222
// so let's create a batch stdin and stdout
23-
batchStdinWriter, batchReader, cancel, err := repo.CatFileBatch(repo.Ctx)
23+
batch, err := NewBatchCatFile(repo.Ctx, repo.Path)
2424
if err != nil {
2525
return nil, err
2626
}
27-
defer cancel()
27+
defer batch.Close()
2828

29-
writeID := func(id string) error {
30-
_, err := batchStdinWriter.Write([]byte(id + "\n"))
31-
return err
32-
}
33-
34-
if err := writeID(commitID); err != nil {
29+
if err := batch.Input(commitID); err != nil {
3530
return nil, err
3631
}
32+
batchReader := batch.Reader()
3733
shaBytes, typ, size, err := ReadBatchLine(batchReader)
3834
if typ != "commit" {
3935
log.Debug("Unable to get commit for: %s. Err: %v", commitID, err)
@@ -146,7 +142,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
146142
// If content can not be read or file is too big just do detection by filename
147143

148144
if f.Size() <= bigFileSize {
149-
if err := writeID(f.ID.String()); err != nil {
145+
if err := batch.Input(f.ID.String()); err != nil {
150146
return nil, err
151147
}
152148
_, _, size, err := ReadBatchLine(batchReader)

0 commit comments

Comments
 (0)