@@ -2,128 +2,147 @@ package git
2
2
3
3
import (
4
4
"bufio"
5
+ "context"
5
6
"fmt"
6
7
"io"
7
- "os"
8
- "os/exec "
8
+
9
+ "github.com/github/git-sizer/internal/pipe "
9
10
)
10
11
11
12
// ObjectIter iterates over objects in a Git repository.
12
13
type ObjectIter struct {
13
- cmd1 * exec.Cmd
14
- cmd2 * exec.Cmd
15
- out1 io.ReadCloser
16
- out2 io.ReadCloser
17
- f * bufio.Reader
18
- errChan <- chan error
14
+ ctx context.Context
15
+ p * pipe.Pipeline
16
+ oidCh chan OID
17
+ errCh chan error
18
+ headerCh chan BatchHeader
19
19
}
20
20
21
21
// NewObjectIter returns an iterator that iterates over objects in
22
22
// `repo`. The arguments are passed to `git rev-list --objects`. The
23
23
// second return value is the stdin of the `rev-list` command. The
24
24
// caller can feed values into it but must close it in any case.
25
- func (repo * Repository ) NewObjectIter (
26
- args ... string ,
27
- ) (* ObjectIter , io.WriteCloser , error ) {
28
- cmd1 := repo .GitCommand (append ([]string {"rev-list" , "--objects" }, args ... )... )
29
- in1 , err := cmd1 .StdinPipe ()
30
- if err != nil {
31
- return nil , nil , err
32
- }
33
-
34
- out1 , err := cmd1 .StdoutPipe ()
35
- if err != nil {
36
- return nil , nil , err
37
- }
38
-
39
- cmd1 .Stderr = os .Stderr
40
-
41
- err = cmd1 .Start ()
42
- if err != nil {
43
- return nil , nil , err
44
- }
45
-
46
- cmd2 := repo .GitCommand ("cat-file" , "--batch-check" , "--buffer" )
47
- in2 , err := cmd2 .StdinPipe ()
48
- if err != nil {
49
- out1 .Close ()
50
- cmd1 .Wait ()
51
- return nil , nil , err
52
- }
53
-
54
- out2 , err := cmd2 .StdoutPipe ()
55
- if err != nil {
56
- in2 .Close ()
57
- out1 .Close ()
58
- cmd1 .Wait ()
59
- return nil , nil , err
25
+ func (repo * Repository ) NewObjectIter (ctx context.Context , args ... string ) (* ObjectIter , error ) {
26
+ iter := ObjectIter {
27
+ ctx : ctx ,
28
+ p : pipe .New (),
29
+ oidCh : make (chan OID ),
30
+ errCh : make (chan error ),
31
+ headerCh : make (chan BatchHeader ),
60
32
}
61
33
62
- cmd2 .Stderr = os .Stderr
34
+ iter .p .Add (
35
+ // Read OIDs from `iter.oidCh` and write them to `git
36
+ // rev-list`:
37
+ pipe .Function (
38
+ "request-objects" ,
39
+ func (ctx context.Context , _ pipe.Env , _ io.Reader , stdout io.Writer ) error {
40
+ out := bufio .NewWriter (stdout )
41
+
42
+ for {
43
+ select {
44
+ case oid , ok := <- iter .oidCh :
45
+ if ! ok {
46
+ return out .Flush ()
47
+ }
48
+ if _ , err := fmt .Fprintln (out , oid .String ()); err != nil {
49
+ return fmt .Errorf ("writing to 'git cat-file': %w" , err )
50
+ }
51
+ case <- ctx .Done ():
52
+ return ctx .Err ()
53
+ }
54
+ }
55
+ },
56
+ ),
57
+
58
+ // Walk starting at the OIDs on `stdin` and output the OIDs
59
+ // (possibly followed by paths) of all of the Git objects
60
+ // found.
61
+ pipe .CommandStage (
62
+ "git-rev-list" ,
63
+ repo .GitCommand ("rev-list" , "--objects" , "--stdin" , "--date-order" ),
64
+ ),
65
+
66
+ // Read the output of `git rev-list --objects`, strip off any
67
+ // trailing information, and write the OIDs to `git cat-file`:
68
+ pipe .LinewiseFunction (
69
+ "copy-oids" ,
70
+ func (_ context.Context , _ pipe.Env , line []byte , stdout * bufio.Writer ) error {
71
+ if len (line ) < 40 {
72
+ return fmt .Errorf ("line too short: '%s'" , line )
73
+ }
74
+ if _ , err := stdout .Write (line [:40 ]); err != nil {
75
+ return fmt .Errorf ("writing OID to 'git cat-file': %w" , err )
76
+ }
77
+ if err := stdout .WriteByte ('\n' ); err != nil {
78
+ return fmt .Errorf ("writing LF to 'git cat-file': %w" , err )
79
+ }
80
+ return nil
81
+ },
82
+ ),
83
+
84
+ // Process the OIDs from stdin and, for each object, output a
85
+ // header:
86
+ pipe .CommandStage (
87
+ "git-cat-file" ,
88
+ repo .GitCommand ("cat-file" , "--batch-check" , "--buffer" ),
89
+ ),
90
+
91
+ // Parse the object headers and shove them into `headerCh`:
92
+ pipe .Function (
93
+ "object-parser" ,
94
+ func (ctx context.Context , _ pipe.Env , stdin io.Reader , _ io.Writer ) error {
95
+ defer close (iter .headerCh )
96
+
97
+ f := bufio .NewReader (stdin )
98
+
99
+ for {
100
+ header , err := f .ReadString ('\n' )
101
+ if err != nil {
102
+ if err == io .EOF {
103
+ return nil
104
+ }
105
+ return fmt .Errorf ("reading from 'git cat-file': %w" , err )
106
+ }
107
+ batchHeader , err := ParseBatchHeader ("" , header )
108
+ if err != nil {
109
+ return fmt .Errorf ("parsing output of 'git cat-file': %w" , err )
110
+ }
111
+
112
+ iter .headerCh <- batchHeader
113
+ }
114
+ },
115
+ ),
116
+ )
63
117
64
- err = cmd2 .Start ()
65
- if err != nil {
66
- return nil , nil , err
118
+ if err := iter .p .Start (ctx ); err != nil {
119
+ return nil , err
67
120
}
68
121
69
- errChan := make (chan error , 1 )
70
-
71
- go func () {
72
- defer in2 .Close ()
73
- f1 := bufio .NewReader (out1 )
74
- f2 := bufio .NewWriter (in2 )
75
- defer f2 .Flush ()
76
- for {
77
- line , err := f1 .ReadString ('\n' )
78
- if err != nil {
79
- if err != io .EOF {
80
- errChan <- err
81
- } else {
82
- errChan <- nil
83
- }
84
- return
85
- }
86
- if len (line ) <= 40 {
87
- errChan <- fmt .Errorf ("line too short: %#v" , line )
88
- }
89
- f2 .WriteString (line [:40 ])
90
- f2 .WriteByte ('\n' )
91
- }
92
- }()
93
-
94
- return & ObjectIter {
95
- cmd1 : cmd1 ,
96
- cmd2 : cmd2 ,
97
- out1 : out1 ,
98
- out2 : out2 ,
99
- f : bufio .NewReader (out2 ),
100
- errChan : errChan ,
101
- }, in1 , nil
122
+ return & iter , nil
102
123
}
103
124
104
- // Next returns the next object: its OID, type, and size. When no more
105
- // data are available, it returns an `io.EOF` error.
106
- func (iter * ObjectIter ) Next () (BatchHeader , error ) {
107
- line , err := iter .f .ReadString ('\n' )
108
- if err != nil {
109
- return missingHeader , err
125
+ // AddRoot adds another OID to be included in the walk.
126
+ func (iter * ObjectIter ) AddRoot (oid OID ) error {
127
+ select {
128
+ case iter .oidCh <- oid :
129
+ return nil
130
+ case <- iter .ctx .Done ():
131
+ return iter .ctx .Err ()
110
132
}
111
-
112
- return ParseBatchHeader ("" , line )
113
133
}
114
134
115
135
// Close closes the iterator and frees up resources.
116
- func (iter * ObjectIter ) Close () error {
117
- iter .out1 .Close ()
118
- err := <- iter .errChan
119
- iter .out2 .Close ()
120
- err2 := iter .cmd1 .Wait ()
121
- if err == nil {
122
- err = err2
123
- }
124
- err2 = iter .cmd2 .Wait ()
125
- if err == nil {
126
- err = err2
136
+ func (iter * ObjectIter ) Close () {
137
+ close (iter .oidCh )
138
+ }
139
+
140
+ // Next returns either the next object (its OID, type, and size), or a
141
+ // `false` boolean value to indicate that there are no data left.
142
+ func (iter * ObjectIter ) Next () (BatchHeader , bool , error ) {
143
+ header , ok := <- iter .headerCh
144
+ if ! ok {
145
+ return missingHeader , false , iter .p .Wait ()
127
146
}
128
- return err
147
+ return header , true , nil
129
148
}
0 commit comments