Skip to content

Commit b43d62d

Browse files
authored
Merge pull request #91 from github/use-pipe-package
Use the new `pipe` package for running external commands
2 parents bb80a01 + 7dd1823 commit b43d62d

File tree

13 files changed

+912
-758
lines changed

13 files changed

+912
-758
lines changed

git/batch_header.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
package git
2+
3+
import (
4+
"fmt"
5+
"strconv"
6+
"strings"
7+
8+
"github.com/github/git-sizer/counts"
9+
)
10+
11+
type BatchHeader struct {
12+
OID OID
13+
ObjectType ObjectType
14+
ObjectSize counts.Count32
15+
}
16+
17+
var missingHeader = BatchHeader{
18+
ObjectType: "missing",
19+
}
20+
21+
// Parse a `cat-file --batch[-check]` output header line (including
22+
// the trailing LF). `spec`, if not "", is used in error messages.
23+
func ParseBatchHeader(spec string, header string) (BatchHeader, error) {
24+
header = header[:len(header)-1]
25+
words := strings.Split(header, " ")
26+
if words[len(words)-1] == "missing" {
27+
if spec == "" {
28+
spec = words[0]
29+
}
30+
return missingHeader, fmt.Errorf("missing object %s", spec)
31+
}
32+
33+
oid, err := NewOID(words[0])
34+
if err != nil {
35+
return missingHeader, err
36+
}
37+
38+
size, err := strconv.ParseUint(words[2], 10, 0)
39+
if err != nil {
40+
return missingHeader, err
41+
}
42+
return BatchHeader{
43+
OID: oid,
44+
ObjectType: ObjectType(words[1]),
45+
ObjectSize: counts.NewCount32(size),
46+
}, nil
47+
}

git/batch_obj_iter.go

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
package git
2+
3+
import (
4+
"bufio"
5+
"context"
6+
"fmt"
7+
"io"
8+
9+
"github.com/github/git-sizer/internal/pipe"
10+
)
11+
12+
type ObjectRecord struct {
13+
BatchHeader
14+
Data []byte
15+
}
16+
17+
// BatchObjectIter iterates over objects whose names are fed into its
18+
// stdin. The output is buffered, so it has to be closed before you
19+
// can be sure that you have gotten all of the objects.
20+
type BatchObjectIter struct {
21+
ctx context.Context
22+
p *pipe.Pipeline
23+
oidCh chan OID
24+
objCh chan ObjectRecord
25+
errCh chan error
26+
}
27+
28+
// NewBatchObjectIter returns a `*BatchObjectIterator` and an
29+
// `io.WriteCloser`. The iterator iterates over objects whose names
30+
// are fed into the `io.WriteCloser`, one per line. The
31+
// `io.WriteCloser` should normally be closed and the iterator's
32+
// output drained before `Close()` is called.
33+
func (repo *Repository) NewBatchObjectIter(ctx context.Context) (*BatchObjectIter, error) {
34+
iter := BatchObjectIter{
35+
ctx: ctx,
36+
p: pipe.New(),
37+
oidCh: make(chan OID),
38+
objCh: make(chan ObjectRecord),
39+
errCh: make(chan error),
40+
}
41+
42+
iter.p.Add(
43+
// Read OIDs from `iter.oidCh` and write them to `git
44+
// cat-file`:
45+
pipe.Function(
46+
"request-objects",
47+
func(ctx context.Context, _ pipe.Env, _ io.Reader, stdout io.Writer) error {
48+
out := bufio.NewWriter(stdout)
49+
50+
for {
51+
select {
52+
case oid, ok := <-iter.oidCh:
53+
if !ok {
54+
return out.Flush()
55+
}
56+
if _, err := fmt.Fprintln(out, oid.String()); err != nil {
57+
return fmt.Errorf("writing to 'git cat-file': %w", err)
58+
}
59+
case <-ctx.Done():
60+
return ctx.Err()
61+
}
62+
}
63+
},
64+
),
65+
66+
// Read OIDs from `stdin` and output a header line followed by
67+
// the contents of the corresponding Git objects:
68+
pipe.CommandStage(
69+
"git-cat-file",
70+
repo.GitCommand("cat-file", "--batch", "--buffer"),
71+
),
72+
73+
// Parse the object headers and read the object contents, and
74+
// shove both into `objCh`:
75+
pipe.Function(
76+
"object-reader",
77+
func(ctx context.Context, _ pipe.Env, stdin io.Reader, _ io.Writer) error {
78+
defer close(iter.objCh)
79+
80+
f := bufio.NewReader(stdin)
81+
82+
for {
83+
header, err := f.ReadString('\n')
84+
if err != nil {
85+
if err == io.EOF {
86+
return nil
87+
}
88+
return fmt.Errorf("reading from 'git cat-file': %w", err)
89+
}
90+
batchHeader, err := ParseBatchHeader("", header)
91+
if err != nil {
92+
return fmt.Errorf("parsing output of 'git cat-file': %w", err)
93+
}
94+
95+
// Read the object contents plus the trailing LF
96+
// (which is discarded below while creating the
97+
// `ObjectRecord`):
98+
data := make([]byte, batchHeader.ObjectSize+1)
99+
if _, err := io.ReadFull(f, data); err != nil {
100+
return fmt.Errorf(
101+
"reading object data from 'git cat-file' for %s '%s': %w",
102+
batchHeader.ObjectType, batchHeader.OID, err,
103+
)
104+
}
105+
106+
select {
107+
case iter.objCh <- ObjectRecord{
108+
BatchHeader: batchHeader,
109+
Data: data[:batchHeader.ObjectSize],
110+
}:
111+
case <-iter.ctx.Done():
112+
return iter.ctx.Err()
113+
}
114+
}
115+
},
116+
),
117+
)
118+
119+
if err := iter.p.Start(ctx); err != nil {
120+
return nil, err
121+
}
122+
123+
return &iter, nil
124+
}
125+
126+
// RequestObject requests that the object with the specified `oid` be
127+
// processed. The objects registered via this method can be read using
128+
// `Next()` in the order that they were requested.
129+
func (iter *BatchObjectIter) RequestObject(oid OID) error {
130+
select {
131+
case iter.oidCh <- oid:
132+
return nil
133+
case <-iter.ctx.Done():
134+
return iter.ctx.Err()
135+
}
136+
}
137+
138+
// Close closes the iterator and frees up resources. Close must be
139+
// called exactly once.
140+
func (iter *BatchObjectIter) Close() {
141+
close(iter.oidCh)
142+
}
143+
144+
// Next either returns the next object (its header and contents), or a
145+
// `false` boolean value if no more objects are left. Objects need to
146+
// be read asynchronously, but the last objects won't necessarily show
147+
// up here until `Close()` has been called.
148+
func (iter *BatchObjectIter) Next() (ObjectRecord, bool, error) {
149+
obj, ok := <-iter.objCh
150+
if !ok {
151+
return ObjectRecord{
152+
BatchHeader: missingHeader,
153+
}, false, iter.p.Wait()
154+
}
155+
return obj, true, nil
156+
}

git/commit.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
package git
2+
3+
import (
4+
"fmt"
5+
6+
"github.com/github/git-sizer/counts"
7+
)
8+
9+
// Commit represents the parts of a commit object that we need.
10+
type Commit struct {
11+
Size counts.Count32
12+
Parents []OID
13+
Tree OID
14+
}
15+
16+
// ParseCommit parses the commit object whose contents are in `data`.
17+
// `oid` is used only in error messages.
18+
func ParseCommit(oid OID, data []byte) (*Commit, error) {
19+
var parents []OID
20+
var tree OID
21+
var treeFound bool
22+
iter, err := NewObjectHeaderIter(oid.String(), data)
23+
if err != nil {
24+
return nil, err
25+
}
26+
for iter.HasNext() {
27+
key, value, err := iter.Next()
28+
if err != nil {
29+
return nil, err
30+
}
31+
switch key {
32+
case "parent":
33+
parent, err := NewOID(value)
34+
if err != nil {
35+
return nil, fmt.Errorf("malformed parent header in commit %s", oid)
36+
}
37+
parents = append(parents, parent)
38+
case "tree":
39+
if treeFound {
40+
return nil, fmt.Errorf("multiple trees found in commit %s", oid)
41+
}
42+
tree, err = NewOID(value)
43+
if err != nil {
44+
return nil, fmt.Errorf("malformed tree header in commit %s", oid)
45+
}
46+
treeFound = true
47+
}
48+
}
49+
if !treeFound {
50+
return nil, fmt.Errorf("no tree found in commit %s", oid)
51+
}
52+
return &Commit{
53+
Size: counts.NewCount32(uint64(len(data))),
54+
Parents: parents,
55+
Tree: tree,
56+
}, nil
57+
}

0 commit comments

Comments
 (0)