Skip to content

Commit eaaba90

Browse files
authored
Merge pull request #276 from erizocosmico/feature/files-table
gitbase: implement files table with pushdown
2 parents 5ec5e43 + 2e83214 commit eaaba90

File tree

10 files changed

+446
-25
lines changed

10 files changed

+446
-25
lines changed

blobs.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ func (i *blobsByHashIter) Close() error {
192192
return nil
193193
}
194194

195-
func blobToRow(repoID string, c *object.Blob, readContent bool) (sql.Row, error) {
195+
func blobContent(c *object.Blob, readContent bool) ([]byte, error) {
196196
var content []byte
197197
var isAllowed = blobsAllowBinary
198198
if !isAllowed && readContent {
@@ -215,6 +215,15 @@ func blobToRow(repoID string, c *object.Blob, readContent bool) (sql.Row, error)
215215
}
216216
}
217217

218+
return content, nil
219+
}
220+
221+
func blobToRow(repoID string, c *object.Blob, readContent bool) (sql.Row, error) {
222+
content, err := blobContent(c, readContent)
223+
if err != nil {
224+
return nil, err
225+
}
226+
218227
return sql.NewRow(
219228
repoID,
220229
c.Hash.String(),
@@ -263,7 +272,7 @@ func shouldReadContent(columns []sql.Expression) bool {
263272
var found bool
264273
expression.Inspect(e, func(e sql.Expression) bool {
265274
gf, ok := e.(*expression.GetField)
266-
found = ok && gf.Table() == BlobsTableName && gf.Name() == "blob_content"
275+
found = ok && gf.Name() == "blob_content"
267276
return !found
268277
})
269278

database.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ const (
2323
CommitTreesTableName = "commit_trees"
2424
// CommitBlobsTableName is the name of the commit blobs table.
2525
CommitBlobsTableName = "commit_blobs"
26+
// FilesTableName is the name of the files table.
27+
FilesTableName = "files"
2628
)
2729

2830
// Database holds all git repository tables
@@ -37,6 +39,7 @@ type Database struct {
3739
refCommits sql.Table
3840
commitTrees sql.Table
3941
commitBlobs sql.Table
42+
files sql.Table
4043
}
4144

4245
// NewDatabase creates a new Database structure and initializes its
@@ -53,6 +56,7 @@ func NewDatabase(name string) sql.Database {
5356
refCommits: newRefCommitsTable(),
5457
commitTrees: newCommitTreesTable(),
5558
commitBlobs: newCommitBlobsTable(),
59+
files: newFilesTable(),
5660
}
5761
}
5862

@@ -73,5 +77,6 @@ func (d *Database) Tables() map[string]sql.Table {
7377
RefCommitsTableName: d.refCommits,
7478
CommitTreesTableName: d.commitTrees,
7579
CommitBlobsTableName: d.commitBlobs,
80+
FilesTableName: d.files,
7681
}
7782
}

database_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ func TestDatabase_Tables(t *testing.T) {
4040
RepositoriesTableName,
4141
RemotesTableName,
4242
CommitBlobsTableName,
43+
FilesTableName,
4344
}
4445
sort.Strings(expected)
4546

files.go

Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
package gitbase
2+
3+
import (
4+
"io"
5+
6+
"gopkg.in/src-d/go-git.v4/plumbing"
7+
"gopkg.in/src-d/go-git.v4/plumbing/object"
8+
"gopkg.in/src-d/go-mysql-server.v0/sql"
9+
)
10+
11+
type filesTable struct{}
12+
13+
// FilesSchema is the schema for the files table.
14+
var FilesSchema = sql.Schema{
15+
{Name: "repository_id", Type: sql.Text, Source: "files"},
16+
{Name: "file_path", Type: sql.Text, Source: "files"},
17+
{Name: "blob_hash", Type: sql.Text, Source: "files"},
18+
{Name: "tree_hash", Type: sql.Text, Source: "files"},
19+
{Name: "tree_entry_mode", Type: sql.Text, Source: "files"},
20+
{Name: "blob_content", Type: sql.Blob, Source: "files"},
21+
{Name: "blob_size", Type: sql.Int64, Source: "files"},
22+
}
23+
24+
func newFilesTable() sql.Table {
25+
return new(filesTable)
26+
}
27+
28+
var _ sql.PushdownProjectionAndFiltersTable = (*filesTable)(nil)
29+
30+
func (filesTable) Resolved() bool { return true }
31+
func (filesTable) Name() string { return FilesTableName }
32+
func (filesTable) Schema() sql.Schema { return FilesSchema }
33+
func (filesTable) Children() []sql.Node { return nil }
34+
35+
func (t *filesTable) TransformExpressionsUp(f sql.TransformExprFunc) (sql.Node, error) {
36+
return t, nil
37+
}
38+
39+
func (t *filesTable) TransformUp(f sql.TransformNodeFunc) (sql.Node, error) {
40+
return f(t)
41+
}
42+
43+
func (filesTable) RowIter(ctx *sql.Context) (sql.RowIter, error) {
44+
span, ctx := ctx.Span("gitbase.FilesTable")
45+
iter := &filesIter{readContent: true}
46+
47+
repoIter, err := NewRowRepoIter(ctx, iter)
48+
if err != nil {
49+
span.Finish()
50+
return nil, err
51+
}
52+
53+
return sql.NewSpanIter(span, repoIter), nil
54+
}
55+
56+
func (filesTable) HandledFilters(filters []sql.Expression) []sql.Expression {
57+
return handledFilters(FilesTableName, FilesSchema, filters)
58+
}
59+
60+
func (filesTable) WithProjectAndFilters(
61+
ctx *sql.Context,
62+
columns, filters []sql.Expression,
63+
) (sql.RowIter, error) {
64+
span, ctx := ctx.Span("gitbase.FilesTable")
65+
iter, err := rowIterWithSelectors(
66+
ctx, FilesSchema, FilesTableName, filters,
67+
[]string{"repository_id", "blob_hash", "file_path", "tree_hash"},
68+
func(selectors selectors) (RowRepoIter, error) {
69+
repos, err := selectors.textValues("repository_id")
70+
if err != nil {
71+
return nil, err
72+
}
73+
74+
treeHashes, err := selectors.textValues("tree_hash")
75+
if err != nil {
76+
return nil, err
77+
}
78+
79+
blobHashes, err := selectors.textValues("blob_hash")
80+
if err != nil {
81+
return nil, err
82+
}
83+
84+
filePaths, err := selectors.textValues("file_path")
85+
if err != nil {
86+
return nil, err
87+
}
88+
89+
return &filesIter{
90+
repos: repos,
91+
treeHashes: stringsToHashes(treeHashes),
92+
blobHashes: stringsToHashes(blobHashes),
93+
filePaths: filePaths,
94+
readContent: shouldReadContent(columns),
95+
}, nil
96+
},
97+
)
98+
99+
if err != nil {
100+
span.Finish()
101+
return nil, err
102+
}
103+
104+
return sql.NewSpanIter(span, iter), nil
105+
}
106+
107+
func (filesTable) String() string {
108+
return printTable(FilesTableName, FilesSchema)
109+
}
110+
111+
type filesIter struct {
112+
repo *Repository
113+
commits object.CommitIter
114+
seen map[plumbing.Hash]struct{}
115+
files *object.FileIter
116+
treeHash plumbing.Hash
117+
118+
readContent bool
119+
120+
// selectors for faster filtering
121+
repos []string
122+
filePaths []string
123+
blobHashes []plumbing.Hash
124+
treeHashes []plumbing.Hash
125+
}
126+
127+
func (i *filesIter) NewIterator(repo *Repository) (RowRepoIter, error) {
128+
var iter object.CommitIter
129+
if len(i.repos) == 0 || stringContains(i.repos, repo.ID) {
130+
var err error
131+
iter, err = repo.Repo.CommitObjects()
132+
if err != nil {
133+
return nil, err
134+
}
135+
}
136+
137+
return &filesIter{
138+
repo: repo,
139+
commits: iter,
140+
seen: make(map[plumbing.Hash]struct{}),
141+
readContent: i.readContent,
142+
filePaths: i.filePaths,
143+
blobHashes: i.blobHashes,
144+
treeHashes: i.treeHashes,
145+
}, nil
146+
}
147+
148+
func (i *filesIter) shouldVisitTree(hash plumbing.Hash) bool {
149+
if _, ok := i.seen[hash]; ok {
150+
return false
151+
}
152+
153+
if len(i.treeHashes) > 0 && !hashContains(i.treeHashes, hash) {
154+
return false
155+
}
156+
157+
return true
158+
}
159+
160+
func (i *filesIter) shouldVisitFile(file *object.File) bool {
161+
if len(i.filePaths) > 0 && !stringContains(i.filePaths, file.Name) {
162+
return false
163+
}
164+
165+
if len(i.blobHashes) > 0 && !hashContains(i.blobHashes, file.Blob.Hash) {
166+
return false
167+
}
168+
169+
return true
170+
}
171+
172+
func (i *filesIter) Next() (sql.Row, error) {
173+
if i.commits == nil {
174+
return nil, io.EOF
175+
}
176+
177+
for {
178+
if i.files == nil {
179+
for {
180+
commit, err := i.commits.Next()
181+
if err != nil {
182+
return nil, err
183+
}
184+
185+
if !i.shouldVisitTree(commit.TreeHash) {
186+
continue
187+
}
188+
189+
i.treeHash = commit.TreeHash
190+
i.seen[commit.TreeHash] = struct{}{}
191+
192+
if i.files, err = commit.Files(); err != nil {
193+
return nil, err
194+
}
195+
196+
break
197+
}
198+
}
199+
200+
f, err := i.files.Next()
201+
if err != nil {
202+
if err == io.EOF {
203+
i.files = nil
204+
continue
205+
}
206+
}
207+
208+
if !i.shouldVisitFile(f) {
209+
continue
210+
}
211+
212+
return fileToRow(i.repo.ID, i.treeHash, f, i.readContent)
213+
}
214+
}
215+
216+
func (i *filesIter) Close() error {
217+
if i.commits != nil {
218+
i.commits.Close()
219+
}
220+
221+
return nil
222+
}
223+
224+
func fileToRow(
225+
repoID string,
226+
treeHash plumbing.Hash,
227+
file *object.File,
228+
readContent bool,
229+
) (sql.Row, error) {
230+
content, err := blobContent(&file.Blob, readContent)
231+
if err != nil {
232+
return nil, err
233+
}
234+
235+
return sql.NewRow(
236+
repoID,
237+
file.Name,
238+
file.Hash.String(),
239+
treeHash.String(),
240+
file.Mode.String(),
241+
content,
242+
file.Size,
243+
), nil
244+
}

0 commit comments

Comments
 (0)