Skip to content

Commit 16f7f4f

Browse files
authored
Merge pull request #219 from erizocosmico/feature/dont-read-blob-content
*: don't read blob content if it's not used
2 parents 9ac9f0b + 40b26c3 commit 16f7f4f

File tree

6 files changed

+158
-49
lines changed

6 files changed

+158
-49
lines changed

blobs.go

Lines changed: 37 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"io/ioutil"
77

88
"gopkg.in/src-d/go-mysql-server.v0/sql"
9+
"gopkg.in/src-d/go-mysql-server.v0/sql/expression"
910

1011
"gopkg.in/src-d/go-git.v4/plumbing"
1112
"gopkg.in/src-d/go-git.v4/plumbing/object"
@@ -70,7 +71,7 @@ func (r *blobsTable) TransformExpressionsUp(f sql.TransformExprFunc) (sql.Node,
7071

7172
func (r blobsTable) RowIter(ctx *sql.Context) (sql.RowIter, error) {
7273
span, ctx := ctx.Span("gitbase.BlobsTable")
73-
iter := new(blobIter)
74+
iter := &blobIter{readContent: true}
7475

7576
repoIter, err := NewRowRepoIter(ctx, iter)
7677
if err != nil {
@@ -91,23 +92,26 @@ func (blobsTable) HandledFilters(filters []sql.Expression) []sql.Expression {
9192

9293
func (r *blobsTable) WithProjectAndFilters(
9394
ctx *sql.Context,
94-
_, filters []sql.Expression,
95+
columns, filters []sql.Expression,
9596
) (sql.RowIter, error) {
9697
span, ctx := ctx.Span("gitbase.BlobsTable")
9798
iter, err := rowIterWithSelectors(
9899
ctx, BlobsSchema, BlobsTableName, filters,
99100
[]string{"hash"},
100101
func(selectors selectors) (RowRepoIter, error) {
101102
if len(selectors["hash"]) == 0 {
102-
return new(blobIter), nil
103+
return &blobIter{readContent: shouldReadContent(columns)}, nil
103104
}
104105

105106
hashes, err := selectors.textValues("hash")
106107
if err != nil {
107108
return nil, err
108109
}
109110

110-
return &blobsByHashIter{hashes: hashes}, nil
111+
return &blobsByHashIter{
112+
hashes: hashes,
113+
readContent: shouldReadContent(columns),
114+
}, nil
111115
},
112116
)
113117

@@ -120,7 +124,8 @@ func (r *blobsTable) WithProjectAndFilters(
120124
}
121125

122126
type blobIter struct {
123-
iter *object.BlobIter
127+
iter *object.BlobIter
128+
readContent bool
124129
}
125130

126131
func (i *blobIter) NewIterator(repo *Repository) (RowRepoIter, error) {
@@ -129,7 +134,7 @@ func (i *blobIter) NewIterator(repo *Repository) (RowRepoIter, error) {
129134
return nil, err
130135
}
131136

132-
return &blobIter{iter: iter}, nil
137+
return &blobIter{iter: iter, readContent: i.readContent}, nil
133138
}
134139

135140
func (i *blobIter) Next() (sql.Row, error) {
@@ -138,7 +143,7 @@ func (i *blobIter) Next() (sql.Row, error) {
138143
return nil, err
139144
}
140145

141-
return blobToRow(o)
146+
return blobToRow(o, i.readContent)
142147
}
143148

144149
func (i *blobIter) Close() error {
@@ -150,13 +155,14 @@ func (i *blobIter) Close() error {
150155
}
151156

152157
type blobsByHashIter struct {
153-
repo *Repository
154-
pos int
155-
hashes []string
158+
repo *Repository
159+
pos int
160+
hashes []string
161+
readContent bool
156162
}
157163

158164
func (i *blobsByHashIter) NewIterator(repo *Repository) (RowRepoIter, error) {
159-
return &blobsByHashIter{repo, 0, i.hashes}, nil
165+
return &blobsByHashIter{repo, 0, i.hashes, i.readContent}, nil
160166
}
161167

162168
func (i *blobsByHashIter) Next() (sql.Row, error) {
@@ -176,26 +182,26 @@ func (i *blobsByHashIter) Next() (sql.Row, error) {
176182
return nil, err
177183
}
178184

179-
return blobToRow(blob)
185+
return blobToRow(blob, i.readContent)
180186
}
181187
}
182188

183189
func (i *blobsByHashIter) Close() error {
184190
return nil
185191
}
186192

187-
func blobToRow(c *object.Blob) (sql.Row, error) {
193+
func blobToRow(c *object.Blob, readContent bool) (sql.Row, error) {
188194
var content []byte
189195
var isAllowed = blobsAllowBinary
190-
if !isAllowed {
196+
if !isAllowed && readContent {
191197
ok, err := isBinary(c)
192198
if err != nil {
193199
return nil, err
194200
}
195201
isAllowed = !ok
196202
}
197203

198-
if c.Size <= int64(blobsMaxSize) && isAllowed {
204+
if c.Size <= int64(blobsMaxSize) && isAllowed && readContent {
199205
r, err := c.Reader()
200206
if err != nil {
201207
return nil, err
@@ -248,3 +254,19 @@ func isBinary(blob *object.Blob) (bool, error) {
248254
}
249255
}
250256
}
257+
258+
func shouldReadContent(columns []sql.Expression) bool {
259+
for _, e := range columns {
260+
var found bool
261+
expression.Inspect(e, func(e sql.Expression) bool {
262+
gf, ok := e.(*expression.GetField)
263+
found = ok && gf.Table() == BlobsTableName && gf.Name() == "content"
264+
return !found
265+
})
266+
267+
if found {
268+
return true
269+
}
270+
}
271+
return false
272+
}

integration_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -229,10 +229,10 @@ func TestSquashCorrectness(t *testing.T) {
229229
`SELECT * FROM tree_entries te INNER JOIN blobs b ON te.entry_hash = b.hash`,
230230

231231
`SELECT * FROM repositories r
232-
INNER JOIN refs re
232+
INNER JOIN refs re
233233
ON r.id = re.repository_id
234-
INNER JOIN commits c
235-
ON re.hash = c.hash
234+
INNER JOIN commits c
235+
ON re.hash = c.hash
236236
WHERE re.name = 'HEAD'`,
237237

238238
`SELECT * FROM commits c

internal/rule/squashjoins.go

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,14 @@ func buildSquashedTable(
312312
return nil, errInvalidIteratorChain.New("tree_entries", iter)
313313
}
314314
case gitbase.BlobsTableName:
315+
var readContent bool
316+
for _, e := range columns {
317+
if containsField(e, gitbase.BlobsTableName, "content") {
318+
readContent = true
319+
break
320+
}
321+
}
322+
315323
switch it := iter.(type) {
316324
case gitbase.RefsIter:
317325
var f sql.Expression
@@ -328,6 +336,7 @@ func buildSquashedTable(
328336
iter = gitbase.NewCommitBlobsIter(
329337
gitbase.NewRefHEADCommitsIter(it, nil, true),
330338
f,
339+
readContent,
331340
)
332341
case gitbase.CommitsIter:
333342
var f sql.Expression
@@ -341,13 +350,10 @@ func buildSquashedTable(
341350
return nil, err
342351
}
343352

344-
iter = gitbase.NewTreeEntryBlobsIter(
345-
gitbase.NewCommitMainTreeEntriesIter(
346-
it,
347-
nil,
348-
true,
349-
),
353+
iter = gitbase.NewCommitBlobsIter(
354+
it,
350355
f,
356+
readContent,
351357
)
352358
case gitbase.TreeEntriesIter:
353359
var f sql.Expression
@@ -361,7 +367,7 @@ func buildSquashedTable(
361367
return nil, err
362368
}
363369

364-
iter = gitbase.NewTreeEntryBlobsIter(it, f)
370+
iter = gitbase.NewTreeEntryBlobsIter(it, f, readContent)
365371
default:
366372
return nil, errInvalidIteratorChain.New("blobs", iter)
367373
}
@@ -936,6 +942,19 @@ func isNum(n int64) validator {
936942
}
937943
}
938944

945+
func containsField(e sql.Expression, table, name string) bool {
946+
var found bool
947+
expression.Inspect(e, func(e sql.Expression) bool {
948+
gf, ok := e.(*expression.GetField)
949+
if ok && gf.Table() == table && gf.Name() == name {
950+
found = true
951+
return false
952+
}
953+
return true
954+
})
955+
return found
956+
}
957+
939958
func fixFieldIndexes(e sql.Expression, schema sql.Schema) (sql.Expression, error) {
940959
if e == nil {
941960
return nil, nil

0 commit comments

Comments
 (0)