@@ -2,80 +2,155 @@ package git
2
2
3
3
import (
4
4
"bufio"
5
+ "context"
6
+ "fmt"
5
7
"io"
6
- "os"
7
- "os/exec "
8
+
9
+ "github.com/github/git-sizer/internal/pipe "
8
10
)
9
11
12
+ type ObjectRecord struct {
13
+ BatchHeader
14
+ Data []byte
15
+ }
16
+
10
17
// BatchObjectIter iterates over objects whose names are fed into its
11
18
// stdin. The output is buffered, so it has to be closed before you
12
19
// can be sure that you have gotten all of the objects.
13
20
type BatchObjectIter struct {
14
- cmd * exec.Cmd
15
- out io.ReadCloser
16
- f * bufio.Reader
21
+ ctx context.Context
22
+ p * pipe.Pipeline
23
+ oidCh chan OID
24
+ objCh chan ObjectRecord
25
+ errCh chan error
17
26
}
18
27
19
28
// NewBatchObjectIter returns a `*BatchObjectIterator` and an
20
29
// `io.WriteCloser`. The iterator iterates over objects whose names
21
30
// are fed into the `io.WriteCloser`, one per line. The
22
31
// `io.WriteCloser` should normally be closed and the iterator's
23
32
// output drained before `Close()` is called.
24
- func (repo * Repository ) NewBatchObjectIter () (* BatchObjectIter , io.WriteCloser , error ) {
25
- cmd := repo .GitCommand ("cat-file" , "--batch" , "--buffer" )
26
-
27
- in , err := cmd .StdinPipe ()
28
- if err != nil {
29
- return nil , nil , err
33
+ func (repo * Repository ) NewBatchObjectIter (ctx context.Context ) (* BatchObjectIter , error ) {
34
+ iter := BatchObjectIter {
35
+ ctx : ctx ,
36
+ p : pipe .New (),
37
+ oidCh : make (chan OID ),
38
+ objCh : make (chan ObjectRecord ),
39
+ errCh : make (chan error ),
30
40
}
31
41
32
- out , err := cmd .StdoutPipe ()
33
- if err != nil {
34
- return nil , nil , err
35
- }
42
+ iter .p .Add (
43
+ // Read OIDs from `iter.oidCh` and write them to `git
44
+ // cat-file`:
45
+ pipe .Function (
46
+ "request-objects" ,
47
+ func (ctx context.Context , _ pipe.Env , _ io.Reader , stdout io.Writer ) error {
48
+ out := bufio .NewWriter (stdout )
49
+
50
+ for {
51
+ select {
52
+ case oid , ok := <- iter .oidCh :
53
+ if ! ok {
54
+ return out .Flush ()
55
+ }
56
+ if _ , err := fmt .Fprintln (out , oid .String ()); err != nil {
57
+ return fmt .Errorf ("writing to 'git cat-file': %w" , err )
58
+ }
59
+ case <- ctx .Done ():
60
+ return ctx .Err ()
61
+ }
62
+ }
63
+ },
64
+ ),
65
+
66
+ // Read OIDs from `stdin` and output a header line followed by
67
+ // the contents of the corresponding Git objects:
68
+ pipe .CommandStage (
69
+ "git-cat-file" ,
70
+ repo .GitCommand ("cat-file" , "--batch" , "--buffer" ),
71
+ ),
72
+
73
+ // Parse the object headers and read the object contents, and
74
+ // shove both into `objCh`:
75
+ pipe .Function (
76
+ "object-reader" ,
77
+ func (ctx context.Context , _ pipe.Env , stdin io.Reader , _ io.Writer ) error {
78
+ defer close (iter .objCh )
36
79
37
- cmd . Stderr = os . Stderr
80
+ f := bufio . NewReader ( stdin )
38
81
39
- err = cmd .Start ()
40
- if err != nil {
41
- return nil , nil , err
82
+ for {
83
+ header , err := f .ReadString ('\n' )
84
+ if err != nil {
85
+ if err == io .EOF {
86
+ return nil
87
+ }
88
+ return fmt .Errorf ("reading from 'git cat-file': %w" , err )
89
+ }
90
+ batchHeader , err := ParseBatchHeader ("" , header )
91
+ if err != nil {
92
+ return fmt .Errorf ("parsing output of 'git cat-file': %w" , err )
93
+ }
94
+
95
+ // Read the object contents plus the trailing LF
96
+ // (which is discarded below while creating the
97
+ // `ObjectRecord`):
98
+ data := make ([]byte , batchHeader .ObjectSize + 1 )
99
+ if _ , err := io .ReadFull (f , data ); err != nil {
100
+ return fmt .Errorf (
101
+ "reading object data from 'git cat-file' for %s '%s': %w" ,
102
+ batchHeader .ObjectType , batchHeader .OID , err ,
103
+ )
104
+ }
105
+
106
+ select {
107
+ case iter .objCh <- ObjectRecord {
108
+ BatchHeader : batchHeader ,
109
+ Data : data [:batchHeader .ObjectSize ],
110
+ }:
111
+ case <- iter .ctx .Done ():
112
+ return iter .ctx .Err ()
113
+ }
114
+ }
115
+ },
116
+ ),
117
+ )
118
+
119
+ if err := iter .p .Start (ctx ); err != nil {
120
+ return nil , err
42
121
}
43
122
44
- return & BatchObjectIter {
45
- cmd : cmd ,
46
- out : out ,
47
- f : bufio .NewReader (out ),
48
- }, in , nil
123
+ return & iter , nil
49
124
}
50
125
51
- // Next returns the next object: its OID, type, size, and contents.
52
- // When no more data are available, it returns an `io.EOF` error.
53
- func (iter * BatchObjectIter ) Next () (BatchHeader , []byte , error ) {
54
- header , err := iter .f .ReadString ('\n' )
55
- if err != nil {
56
- return missingHeader , nil , err
57
- }
58
- obj , err := ParseBatchHeader ("" , header )
59
- if err != nil {
60
- return missingHeader , nil , err
126
+ // RequestObject requests that the object with the specified `oid` be
127
+ // processed. The objects registered via this method can be read using
128
+ // `Next()` in the order that they were requested.
129
+ func (iter * BatchObjectIter ) RequestObject (oid OID ) error {
130
+ select {
131
+ case iter .oidCh <- oid :
132
+ return nil
133
+ case <- iter .ctx .Done ():
134
+ return iter .ctx .Err ()
61
135
}
62
- // +1 for LF:
63
- data := make ([]byte , obj .ObjectSize + 1 )
64
- _ , err = io .ReadFull (iter .f , data )
65
- if err != nil {
66
- return missingHeader , nil , err
67
- }
68
- data = data [:len (data )- 1 ]
69
- return obj , data , nil
70
136
}
71
137
72
- // Close closes the iterator and frees up resources. If any iterator
73
- // output hasn't been read yet, it will be lost.
74
- func (iter * BatchObjectIter ) Close () error {
75
- err := iter .out .Close ()
76
- err2 := iter .cmd .Wait ()
77
- if err == nil {
78
- err = err2
138
+ // Close closes the iterator and frees up resources. Close must be
139
+ // called exactly once.
140
+ func (iter * BatchObjectIter ) Close () {
141
+ close (iter .oidCh )
142
+ }
143
+
144
+ // Next either returns the next object (its header and contents), or a
145
+ // `false` boolean value if no more objects are left. Objects need to
146
+ // be read asynchronously, but the last objects won't necessarily show
147
+ // up here until `Close()` has been called.
148
+ func (iter * BatchObjectIter ) Next () (ObjectRecord , bool , error ) {
149
+ obj , ok := <- iter .objCh
150
+ if ! ok {
151
+ return ObjectRecord {
152
+ BatchHeader : missingHeader ,
153
+ }, false , iter .p .Wait ()
79
154
}
80
- return err
155
+ return obj , true , nil
81
156
}
0 commit comments