Skip to content

Commit 275abfa

Browse files
authored
Merge pull request #23 from go-git/verify-pack
2 parents b38fe28 + 94e409d commit 275abfa

File tree

2 files changed

+364
-0
lines changed

2 files changed

+364
-0
lines changed

cmd/gogit/verify-pack.go

Lines changed: 363 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,363 @@
1+
package main
2+
3+
import (
4+
"crypto"
5+
"errors"
6+
"fmt"
7+
"io"
8+
"log/slog"
9+
"os"
10+
"sort"
11+
"strings"
12+
13+
"github.com/go-git/go-billy/v6"
14+
fixtures "github.com/go-git/go-git-fixtures/v5"
15+
"github.com/go-git/go-git/v6/plumbing"
16+
"github.com/go-git/go-git/v6/plumbing/format/idxfile"
17+
"github.com/go-git/go-git/v6/plumbing/format/packfile"
18+
"github.com/spf13/cobra"
19+
)
20+
21+
var (
22+
verifyPackVerbose bool
23+
verifyPackFixtureUrl bool
24+
verifyPackFixtureTag bool
25+
verifyPackSHA256 bool
26+
)
27+
28+
func init() {
29+
verifyPackCmd.Flags().BoolVarP(&verifyPackVerbose, "verbose", "v", false, "Show detailed object information")
30+
verifyPackCmd.Flags().BoolVarP(&verifyPackFixtureUrl, "fixture-url", "", false, "Use <file> as go-git-fixture url")
31+
verifyPackCmd.Flags().BoolVarP(&verifyPackFixtureTag, "fixture-tag", "", false, "Use <file> as go-git-fixture tag")
32+
verifyPackCmd.Flags().BoolVarP(&verifyPackSHA256, "sha256", "", false, "Treat the pack file as sha256")
33+
rootCmd.AddCommand(verifyPackCmd)
34+
}
35+
36+
var verifyPackCmd = &cobra.Command{
37+
Use: "verify-pack [-v] <file>",
38+
Short: "Validate packed Git archive files",
39+
Args: cobra.ExactArgs(1),
40+
RunE: func(cmd *cobra.Command, args []string) error {
41+
return verifyPack(args[0], verifyPackVerbose)
42+
},
43+
DisableFlagsInUseLine: true,
44+
}
45+
46+
type objectInfo struct {
47+
hash plumbing.Hash
48+
typ plumbing.ObjectType
49+
diskType plumbing.ObjectType
50+
size int64
51+
packedSize int64
52+
offset int64
53+
depth int
54+
base plumbing.Hash
55+
}
56+
57+
func verifyPack(path string, verbose bool) error {
58+
idxFile, packFile, err := openPack(path)
59+
if err != nil {
60+
return err
61+
}
62+
63+
defer func() {
64+
err = idxFile.Close()
65+
if err != nil {
66+
slog.Debug("failed to close idx file", "error", err)
67+
}
68+
}()
69+
70+
defer func() {
71+
err = packFile.Close()
72+
if err != nil {
73+
slog.Debug("failed to close pack file", "error", err)
74+
}
75+
}()
76+
77+
ch := crypto.SHA1
78+
if verifyPackSHA256 {
79+
ch = crypto.SHA256
80+
}
81+
82+
idx := idxfile.NewMemoryIndex(ch.Size())
83+
84+
dec := idxfile.NewDecoder(idxFile)
85+
if err := dec.Decode(idx); err != nil {
86+
return fmt.Errorf("failed to decode index file: %w", err)
87+
}
88+
89+
pf := packfile.NewPackfile(
90+
packFile,
91+
packfile.WithIdx(idx),
92+
packfile.WithObjectIDSize(ch.Size()),
93+
)
94+
95+
defer func() {
96+
err := pf.Close()
97+
if err != nil {
98+
slog.Debug("failed to close Packfile object", "error", err)
99+
}
100+
}()
101+
102+
scanner, err := pf.Scanner()
103+
if err != nil {
104+
return fmt.Errorf("failed to get scanner: %w", err)
105+
}
106+
107+
entries, err := idx.EntriesByOffset()
108+
if err != nil {
109+
return fmt.Errorf("failed to get entries: %w", err)
110+
}
111+
112+
var objects []objectInfo
113+
114+
for {
115+
entry, err := entries.Next()
116+
if errors.Is(err, io.EOF) {
117+
break
118+
}
119+
120+
if err != nil {
121+
return fmt.Errorf("failed to read entry: %w", err)
122+
}
123+
124+
// Read raw object header to get delta information.
125+
err = scanner.SeekFromStart(int64(entry.Offset))
126+
if err != nil {
127+
return fmt.Errorf("failed to seek to offset %d: %w", entry.Offset, err)
128+
}
129+
130+
if !scanner.Scan() {
131+
return fmt.Errorf("failed to scan object at offset %d", entry.Offset)
132+
}
133+
134+
header := scanner.Data().Value().(packfile.ObjectHeader)
135+
136+
// For delta objects, Size is the delta size.
137+
// For regular objects, Size is the inflated size.
138+
info := objectInfo{
139+
hash: entry.Hash,
140+
diskType: header.Type,
141+
size: header.Size,
142+
offset: int64(entry.Offset),
143+
}
144+
145+
// Calculate packed size (distance to next header or end of file).
146+
if len(objects) > 0 {
147+
objects[len(objects)-1].packedSize = info.offset - objects[len(objects)-1].offset
148+
}
149+
150+
objects = append(objects, info)
151+
}
152+
153+
// Calculate the packed size of the last object.
154+
if len(objects) > 0 {
155+
stat, err := packFile.Stat()
156+
if err != nil {
157+
return fmt.Errorf("failed to stat pack file: %w", err)
158+
}
159+
// Pack file ends with a checksum (20-byte SHA-1 or 32-byte SHA-256).
160+
objects[len(objects)-1].packedSize = stat.Size() - objects[len(objects)-1].offset - int64(ch.Size())
161+
}
162+
163+
// Resolve actual types for all objects (after delta application).
164+
for i := range objects {
165+
obj, err := pf.GetByOffset(objects[i].offset)
166+
if err != nil {
167+
return fmt.Errorf("failed to get object at offset %d: %w", objects[i].offset, err)
168+
}
169+
170+
objects[i].typ = obj.Type()
171+
}
172+
173+
// Build delta chain information.
174+
deltaChains := make(map[plumbing.Hash]int)
175+
objectByHash := make(map[plumbing.Hash]*objectInfo)
176+
objectByOffset := make(map[int64]*objectInfo)
177+
178+
for i := range objects {
179+
objectByHash[objects[i].hash] = &objects[i]
180+
objectByOffset[objects[i].offset] = &objects[i]
181+
}
182+
183+
// Calculate delta chains by reading headers again.
184+
for i := range objects {
185+
if !objects[i].diskType.IsDelta() {
186+
continue
187+
}
188+
189+
err := scanner.SeekFromStart(objects[i].offset)
190+
if err != nil {
191+
return fmt.Errorf("failed to seek to offset %d: %w", objects[i].offset, err)
192+
}
193+
194+
if !scanner.Scan() {
195+
return fmt.Errorf("failed to scan object at offset %d", objects[i].offset)
196+
}
197+
198+
header := scanner.Data().Value().(packfile.ObjectHeader)
199+
200+
// Calculate delta chain depth.
201+
depth := 1
202+
203+
var baseHash plumbing.Hash
204+
205+
switch header.Type {
206+
case plumbing.REFDeltaObject:
207+
baseHash = header.Reference
208+
case plumbing.OFSDeltaObject:
209+
// OffsetReference is the absolute offset of the base object.
210+
if baseObj, ok := objectByOffset[header.OffsetReference]; ok {
211+
baseHash = baseObj.hash
212+
}
213+
}
214+
215+
// Follow the chain to calculate total depth.
216+
if !baseHash.IsZero() {
217+
current := baseHash
218+
for {
219+
baseObj, ok := objectByHash[current]
220+
if !ok {
221+
break
222+
}
223+
224+
if !baseObj.diskType.IsDelta() {
225+
// Reached a non-delta base.
226+
break
227+
}
228+
229+
// Get the base object's header.
230+
err := scanner.SeekFromStart(baseObj.offset)
231+
if err != nil {
232+
break
233+
}
234+
235+
if !scanner.Scan() {
236+
break
237+
}
238+
239+
baseHeader := scanner.Data().Value().(packfile.ObjectHeader)
240+
241+
depth++
242+
243+
if baseHeader.Type == plumbing.REFDeltaObject {
244+
current = baseHeader.Reference
245+
} else if baseHeader.Type == plumbing.OFSDeltaObject {
246+
// OffsetReference is the absolute offset.
247+
if nextBase, ok := objectByOffset[baseHeader.OffsetReference]; ok {
248+
current = nextBase.hash
249+
} else {
250+
break
251+
}
252+
} else {
253+
break
254+
}
255+
}
256+
}
257+
258+
objects[i].depth = depth
259+
objects[i].base = baseHash
260+
deltaChains[objects[i].hash] = depth
261+
}
262+
263+
if verbose {
264+
for _, obj := range objects {
265+
// Format type with padding to match git's output.
266+
typeStr := obj.typ.String()
267+
if len(typeStr) == 4 {
268+
typeStr = typeStr + " "
269+
} else {
270+
typeStr = typeStr + " "
271+
}
272+
273+
fmt.Printf("%s %s%d %d %d",
274+
obj.hash.String(),
275+
typeStr,
276+
obj.size,
277+
obj.packedSize,
278+
obj.offset,
279+
)
280+
281+
if obj.diskType.IsDelta() && !obj.base.IsZero() {
282+
fmt.Printf(" %d %s", obj.depth, obj.base.String())
283+
}
284+
285+
fmt.Println()
286+
}
287+
288+
// Print statistics.
289+
nonDelta := len(objects) - len(deltaChains)
290+
fmt.Printf("non delta: %d objects\n", nonDelta)
291+
292+
// Count chain lengths.
293+
chainLengths := make(map[int]int)
294+
for _, depth := range deltaChains {
295+
chainLengths[depth]++
296+
}
297+
298+
// Sort chain lengths for consistent output.
299+
var lengths []int
300+
for length := range chainLengths {
301+
lengths = append(lengths, length)
302+
}
303+
304+
sort.Ints(lengths)
305+
306+
for _, length := range lengths {
307+
count := chainLengths[length]
308+
309+
objWord := "objects"
310+
if count == 1 {
311+
objWord = "object"
312+
}
313+
314+
fmt.Printf("chain length = %d: %d %s\n", length, count, objWord)
315+
}
316+
}
317+
318+
fmt.Printf("%s: ok\n", path)
319+
320+
return nil
321+
}
322+
323+
func openPack(path string) (billy.File, billy.File, error) {
324+
if verifyPackFixtureUrl || verifyPackFixtureTag {
325+
var f fixtures.Fixtures
326+
if verifyPackFixtureUrl {
327+
f = fixtures.ByURL(path)
328+
}
329+
if verifyPackFixtureTag {
330+
f = fixtures.ByTag(path)
331+
}
332+
333+
if len(f) == 0 {
334+
return nil, nil, fmt.Errorf("no fixture found for %q", path)
335+
}
336+
337+
fixture := f.One()
338+
return fixture.Idx(), fixture.Packfile(), nil
339+
}
340+
341+
idxPath := path
342+
packPath := path
343+
344+
if before, ok := strings.CutSuffix(path, ".idx"); ok {
345+
packPath = before + ".pack"
346+
} else if before, ok := strings.CutSuffix(path, ".pack"); ok {
347+
idxPath = before + ".idx"
348+
} else {
349+
return nil, nil, errors.New("file must have .idx or .pack extension")
350+
}
351+
352+
idxFile, err := os.Open(idxPath)
353+
if err != nil {
354+
return nil, nil, fmt.Errorf("failed to open index file: %w", err)
355+
}
356+
357+
packFile, err := os.Open(packPath)
358+
if err != nil {
359+
return nil, nil, fmt.Errorf("failed to open pack file: %w", err)
360+
}
361+
362+
return idxFile, packFile, nil
363+
}

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ toolchain go1.25.4
66

77
require (
88
github.com/go-git/go-billy/v6 v6.0.0-20251120215217-80673c4ccbfb
9+
github.com/go-git/go-git-fixtures/v5 v5.1.1
910
github.com/go-git/go-git/v6 v6.0.0-20251123162143-36fa81975a20
1011
github.com/spf13/cobra v1.10.1
1112
golang.org/x/crypto v0.45.0

0 commit comments

Comments
 (0)