Skip to content

Commit ff072ec

Browse files
authored
Merge pull request #670 from erizocosmico/feature/index-checksum
gitbase: implement table checksums
2 parents 313b043 + f3e1321 commit ff072ec

File tree

268 files changed

+63554
-228
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

268 files changed

+63554
-228
lines changed

Gopkg.lock

Lines changed: 19 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Gopkg.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[[constraint]]
22
name = "gopkg.in/src-d/go-mysql-server.v0"
3-
revision = "ac598027ca4498f318051bcb79ca5b4231faf733"
3+
revision = "7afa88202b2490bb93c08603da422a690099695e"
44

55
[[constraint]]
66
name = "github.com/jessevdk/go-flags"

blobs.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ var (
2626
)
2727

2828
type blobsTable struct {
29+
checksumable
2930
partitioned
3031
filters []sql.Expression
3132
projection []string
@@ -40,8 +41,8 @@ var BlobsSchema = sql.Schema{
4041
{Name: "blob_content", Type: sql.Blob, Nullable: false, Source: BlobsTableName},
4142
}
4243

43-
func newBlobsTable() *blobsTable {
44-
return new(blobsTable)
44+
func newBlobsTable(pool *RepositoryPool) *blobsTable {
45+
return &blobsTable{checksumable: checksumable{pool}}
4546
}
4647

4748
var _ Table = (*blobsTable)(nil)
@@ -155,13 +156,13 @@ func (*blobsTable) handledColumns() []string {
155156
}
156157

157158
// IndexKeyValues implements the sql.IndexableTable interface.
158-
func (*blobsTable) IndexKeyValues(
159+
func (r *blobsTable) IndexKeyValues(
159160
ctx *sql.Context,
160161
colNames []string,
161162
) (sql.PartitionIndexKeyValueIter, error) {
162163
return newPartitionedIndexKeyValueIter(
163164
ctx,
164-
newBlobsTable(),
165+
newBlobsTable(r.pool),
165166
colNames,
166167
newBlobsKeyValueIter,
167168
)

blobs_test.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ func TestBlobsTable(t *testing.T) {
1313
ctx, _, cleanup := setup(t)
1414
defer cleanup()
1515

16-
table := getTable(require, BlobsTableName)
16+
table := getTable(t, BlobsTableName, ctx)
1717

1818
rows, err := tableToRows(ctx, table)
1919
require.NoError(err)
@@ -28,7 +28,7 @@ func TestBlobsTable(t *testing.T) {
2828

2929
func TestBlobsLimit(t *testing.T) {
3030
require := require.New(t)
31-
session, _, cleanup := setup(t)
31+
ctx, _, cleanup := setup(t)
3232
defer cleanup()
3333

3434
prev := blobsMaxSize
@@ -37,8 +37,9 @@ func TestBlobsLimit(t *testing.T) {
3737
blobsMaxSize = prev
3838
}()
3939

40-
table := newBlobsTable().WithProjection([]string{"blob_content"})
41-
rows, err := tableToRows(session, table)
40+
table := newBlobsTable(poolFromCtx(t, ctx)).
41+
WithProjection([]string{"blob_content"})
42+
rows, err := tableToRows(ctx, table)
4243
require.NoError(err)
4344

4445
expected := []struct {
@@ -72,7 +73,7 @@ func TestBlobsPushdown(t *testing.T) {
7273
ctx, _, cleanup := setup(t)
7374
defer cleanup()
7475

75-
table := newBlobsTable()
76+
table := newBlobsTable(poolFromCtx(t, ctx))
7677

7778
rows, err := tableToRows(ctx, table)
7879
require.NoError(err)

checksum.go

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
package gitbase
2+
3+
import (
4+
"bytes"
5+
"crypto/sha1"
6+
"encoding/base64"
7+
"io"
8+
9+
git "gopkg.in/src-d/go-git.v4"
10+
"gopkg.in/src-d/go-git.v4/plumbing"
11+
)
12+
13+
type checksumable struct {
14+
pool *RepositoryPool
15+
}
16+
17+
func (c *checksumable) Checksum() (string, error) {
18+
hash := sha1.New()
19+
for _, id := range c.pool.idOrder {
20+
repo := c.pool.repositories[id]
21+
hash.Write([]byte(id))
22+
23+
bytes, err := readChecksum(repo)
24+
if err != nil {
25+
return "", err
26+
}
27+
28+
if _, err = hash.Write(bytes); err != nil {
29+
return "", err
30+
}
31+
32+
bytes, err = readRefs(repo)
33+
if err != nil {
34+
return "", err
35+
}
36+
37+
if _, err = hash.Write(bytes); err != nil {
38+
return "", err
39+
}
40+
}
41+
42+
return base64.StdEncoding.EncodeToString(hash.Sum(nil)), nil
43+
}
44+
45+
func readChecksum(r repository) ([]byte, error) {
46+
fs, err := r.FS()
47+
if err != nil {
48+
return nil, err
49+
}
50+
51+
dot, packfiles, err := repositoryPackfiles(fs)
52+
if err != nil {
53+
return nil, err
54+
}
55+
56+
var result []byte
57+
for _, p := range packfiles {
58+
f, err := dot.ObjectPack(p)
59+
if err != nil {
60+
return nil, err
61+
}
62+
63+
if _, err = f.Seek(-20, io.SeekEnd); err != nil {
64+
return nil, err
65+
}
66+
67+
var checksum = make([]byte, 20)
68+
if _, err = io.ReadFull(f, checksum); err != nil {
69+
return nil, err
70+
}
71+
72+
if err = f.Close(); err != nil {
73+
return nil, err
74+
}
75+
76+
result = append(result, checksum...)
77+
}
78+
79+
return result, nil
80+
}
81+
82+
func readRefs(r repository) ([]byte, error) {
83+
repo, err := r.Repo()
84+
if err != nil {
85+
if err == git.ErrRepositoryNotExists {
86+
return nil, nil
87+
}
88+
return nil, err
89+
}
90+
91+
buf := bytes.NewBuffer(nil)
92+
93+
head, err := repo.Head()
94+
if err != nil && err != plumbing.ErrReferenceNotFound {
95+
return nil, err
96+
} else {
97+
buf.WriteString("HEAD")
98+
buf.WriteString(head.Hash().String())
99+
}
100+
101+
refs, err := repo.References()
102+
if err != nil {
103+
return nil, err
104+
}
105+
106+
err = refs.ForEach(func(r *plumbing.Reference) error {
107+
buf.WriteString(string(r.Name()))
108+
buf.WriteString(r.Hash().String())
109+
return nil
110+
})
111+
if err != nil {
112+
return nil, err
113+
}
114+
115+
return buf.Bytes(), nil
116+
}

checksum_test.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
package gitbase
2+
3+
import (
4+
"fmt"
5+
"testing"
6+
7+
"github.com/stretchr/testify/require"
8+
fixtures "gopkg.in/src-d/go-git-fixtures.v3"
9+
"gopkg.in/src-d/go-git.v4/plumbing/cache"
10+
)
11+
12+
func TestChecksum(t *testing.T) {
13+
require := require.New(t)
14+
15+
require.NoError(fixtures.Init())
16+
defer func() {
17+
require.NoError(fixtures.Clean())
18+
}()
19+
20+
pool := NewRepositoryPool(cache.DefaultMaxSize)
21+
22+
for i, f := range fixtures.ByTag("worktree") {
23+
path := f.Worktree().Root()
24+
require.NoError(pool.AddGitWithID(fmt.Sprintf("repo_%d", i), path))
25+
}
26+
27+
c := &checksumable{pool}
28+
checksum, err := c.Checksum()
29+
require.NoError(err)
30+
require.Equal("ogfv7HAwFigDgtuW4tbnEP+Zl40=", checksum)
31+
32+
pool = NewRepositoryPool(cache.DefaultMaxSize)
33+
path := fixtures.ByTag("worktree").One().Worktree().Root()
34+
require.NoError(pool.AddGitWithID("worktree", path))
35+
36+
c = &checksumable{pool}
37+
checksum, err = c.Checksum()
38+
require.NoError(err)
39+
require.Equal("5kfLCygyBSZFMh+nFzFNk3zAUTQ=", checksum)
40+
}

cmd/gitbase/command/server.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ func (c *Server) buildDatabase() error {
204204
return err
205205
}
206206

207-
c.engine.AddDatabase(gitbase.NewDatabase(c.Name))
207+
c.engine.AddDatabase(gitbase.NewDatabase(c.Name, c.pool))
208208
c.engine.AddDatabase(sql.NewInformationSchemaDatabase(c.engine.Catalog))
209209
c.engine.Catalog.SetCurrentDatabase(c.Name)
210210
logrus.WithField("db", c.Name).Debug("registered database to catalog")

commit_blobs.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
)
1111

1212
type commitBlobsTable struct {
13+
checksumable
1314
partitioned
1415
filters []sql.Expression
1516
index sql.IndexLookup
@@ -24,8 +25,8 @@ var CommitBlobsSchema = sql.Schema{
2425

2526
var _ Table = (*commitBlobsTable)(nil)
2627

27-
func newCommitBlobsTable() Indexable {
28-
return new(commitBlobsTable)
28+
func newCommitBlobsTable(pool *RepositoryPool) Indexable {
29+
return &commitBlobsTable{checksumable: checksumable{pool}}
2930
}
3031

3132
var _ Squashable = (*blobsTable)(nil)
@@ -120,13 +121,13 @@ func (commitBlobsTable) HandledFilters(filters []sql.Expression) []sql.Expressio
120121
}
121122

122123
// IndexKeyValues implements the sql.IndexableTable interface.
123-
func (*commitBlobsTable) IndexKeyValues(
124+
func (t *commitBlobsTable) IndexKeyValues(
124125
ctx *sql.Context,
125126
colNames []string,
126127
) (sql.PartitionIndexKeyValueIter, error) {
127128
return newTablePartitionIndexKeyValueIter(
128129
ctx,
129-
newCommitBlobsTable(),
130+
newCommitBlobsTable(t.pool),
130131
CommitBlobsTableName,
131132
colNames,
132133
new(commitBlobsRowKeyMapper),

commit_blobs_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@ import (
1414
func TestCommitBlobsTableRowIter(t *testing.T) {
1515
require := require.New(t)
1616

17-
table := newCommitBlobsTable()
18-
require.NotNil(table)
19-
2017
ctx, paths, cleanup := setupRepos(t)
2118
defer cleanup()
2219

20+
table := newCommitBlobsTable(poolFromCtx(t, ctx))
21+
require.NotNil(table)
22+
2323
expectedRows := []sql.Row{
2424
sql.NewRow(paths[0], "e8d3ffab552895c19b9fcf7aa264d277cde33881", "32858aad3c383ed1ff0a0f9bdf231d54a00c9e88"),
2525
sql.NewRow(paths[0], "e8d3ffab552895c19b9fcf7aa264d277cde33881", "d3ff53e0564a9f87d8e84b6e28e5060e517008aa"),

commit_files.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
)
1515

1616
type commitFilesTable struct {
17+
checksumable
1718
partitioned
1819
filters []sql.Expression
1920
index sql.IndexLookup
@@ -28,8 +29,8 @@ var CommitFilesSchema = sql.Schema{
2829
{Name: "tree_hash", Type: sql.Text, Source: CommitFilesTableName},
2930
}
3031

31-
func newCommitFilesTable() Indexable {
32-
return new(commitFilesTable)
32+
func newCommitFilesTable(pool *RepositoryPool) Indexable {
33+
return &commitFilesTable{checksumable: checksumable{pool}}
3334
}
3435

3536
var _ Table = (*commitFilesTable)(nil)
@@ -127,13 +128,13 @@ func (commitFilesTable) handledColumns() []string {
127128
}
128129

129130
// IndexKeyValues implements the sql.IndexableTable interface.
130-
func (*commitFilesTable) IndexKeyValues(
131+
func (t *commitFilesTable) IndexKeyValues(
131132
ctx *sql.Context,
132133
colNames []string,
133134
) (sql.PartitionIndexKeyValueIter, error) {
134135
return newPartitionedIndexKeyValueIter(
135136
ctx,
136-
newCommitFilesTable(),
137+
newCommitFilesTable(t.pool),
137138
colNames,
138139
newCommitFilesKeyValueIter,
139140
)

0 commit comments

Comments
 (0)