Skip to content

Commit 9fc1de8

Browse files
authored
Merge pull request #201 from erizocosmico/feature/join-pushdown
*: implement rule to squash chainable tables into a single one
2 parents 4ef52ce + 0f5b308 commit 9fc1de8

File tree

2,486 files changed

+426920
-442
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,486 files changed

+426920
-442
lines changed

Gopkg.lock

Lines changed: 47 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Gopkg.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[[constraint]]
22
name = "gopkg.in/src-d/go-mysql-server.v0"
3-
revision = "d48a5ee361f27bb4ad3e135b7435e02e8e5baaa2"
3+
revision = "59b23db1ee89794aa6de1778e8dbb93accff5611"
44

55
[[constraint]]
66
name = "github.com/jessevdk/go-flags"

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ To make some common tasks easier for the user, there are some functions to inter
6363
|is_remote(reference_name)bool| check if the given reference name is from a remote one |
6464
|is_tag(reference_name)bool| check if the given reference name is a tag |
6565

66+
## Unstable features
67+
68+
- **Table squashing:** there is an optimization that collects inner joins between tables with a set of supported conditions and converts them into a single node that retrieves the data in chained steps (getting first the commits and then the blobs of every commit instead of joinin all commits and all blobs, for example). It can be enabled with the environment variable `UNSTABLE_SQUASH_ENABLE`.
69+
6670
## Examples
6771

6872
### Get all the HEAD references from all the repositories

blobs.go

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,11 @@ var (
2727

2828
type blobsTable struct{}
2929

30-
var blobsSchema = sql.Schema{
31-
{Name: "hash", Type: sql.Text, Nullable: false, Source: blobsTableName},
32-
{Name: "size", Type: sql.Int64, Nullable: false, Source: blobsTableName},
33-
{Name: "content", Type: sql.Blob, Nullable: false, Source: blobsTableName},
30+
// BlobsSchema is the schema for the blobs table.
31+
var BlobsSchema = sql.Schema{
32+
{Name: "hash", Type: sql.Text, Nullable: false, Source: BlobsTableName},
33+
{Name: "size", Type: sql.Int64, Nullable: false, Source: BlobsTableName},
34+
{Name: "content", Type: sql.Blob, Nullable: false, Source: BlobsTableName},
3435
}
3536

3637
var _ sql.PushdownProjectionAndFiltersTable = (*blobsTable)(nil)
@@ -39,20 +40,24 @@ func newBlobsTable() sql.Table {
3940
return new(blobsTable)
4041
}
4142

43+
var _ Table = (*blobsTable)(nil)
44+
45+
func (blobsTable) isGitbaseTable() {}
46+
4247
func (blobsTable) String() string {
43-
return printTable(blobsTableName, blobsSchema)
48+
return printTable(BlobsTableName, BlobsSchema)
4449
}
4550

4651
func (blobsTable) Resolved() bool {
4752
return true
4853
}
4954

5055
func (blobsTable) Name() string {
51-
return blobsTableName
56+
return BlobsTableName
5257
}
5358

5459
func (blobsTable) Schema() sql.Schema {
55-
return blobsSchema
60+
return BlobsSchema
5661
}
5762

5863
func (r *blobsTable) TransformUp(f sql.TransformNodeFunc) (sql.Node, error) {
@@ -79,15 +84,15 @@ func (blobsTable) Children() []sql.Node {
7984
}
8085

8186
func (blobsTable) HandledFilters(filters []sql.Expression) []sql.Expression {
82-
return handledFilters(blobsTableName, blobsSchema, filters)
87+
return handledFilters(BlobsTableName, BlobsSchema, filters)
8388
}
8489

8590
func (r *blobsTable) WithProjectAndFilters(
8691
ctx *sql.Context,
8792
_, filters []sql.Expression,
8893
) (sql.RowIter, error) {
8994
return rowIterWithSelectors(
90-
ctx, blobsSchema, blobsTableName, filters,
95+
ctx, BlobsSchema, BlobsTableName, filters,
9196
[]string{"hash"},
9297
func(selectors selectors) (RowRepoIter, error) {
9398
if len(selectors["hash"]) == 0 {

blobs_test.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,19 @@ import (
1111
func TestBlobsTable_Name(t *testing.T) {
1212
require := require.New(t)
1313

14-
table := getTable(require, blobsTableName)
15-
require.Equal(blobsTableName, table.Name())
14+
table := getTable(require, BlobsTableName)
15+
require.Equal(BlobsTableName, table.Name())
1616

1717
// Check that each column source is the same as table name
1818
for _, c := range table.Schema() {
19-
require.Equal(blobsTableName, c.Source)
19+
require.Equal(BlobsTableName, c.Source)
2020
}
2121
}
2222

2323
func TestBlobsTable_Children(t *testing.T) {
2424
require := require.New(t)
2525

26-
table := getTable(require, blobsTableName)
26+
table := getTable(require, BlobsTableName)
2727
require.Equal(0, len(table.Children()))
2828
}
2929

@@ -32,7 +32,7 @@ func TestBlobsTable_RowIter(t *testing.T) {
3232
ctx, _, cleanup := setup(t)
3333
defer cleanup()
3434

35-
table := getTable(require, blobsTableName)
35+
table := getTable(require, BlobsTableName)
3636

3737
rows, err := sql.NodeToRows(ctx, table)
3838
require.NoError(err)
@@ -105,7 +105,7 @@ func TestBlobsPushdown(t *testing.T) {
105105

106106
iter, err = table.WithProjectAndFilters(session, nil, []sql.Expression{
107107
expression.NewEquals(
108-
expression.NewGetFieldWithTable(0, sql.Text, blobsTableName, "hash", false),
108+
expression.NewGetFieldWithTable(0, sql.Text, BlobsTableName, "hash", false),
109109
expression.NewLiteral("32858aad3c383ed1ff0a0f9bdf231d54a00c9e88", sql.Text),
110110
),
111111
})
@@ -117,15 +117,15 @@ func TestBlobsPushdown(t *testing.T) {
117117

118118
iter, err = table.WithProjectAndFilters(session, nil, []sql.Expression{
119119
expression.NewLessThan(
120-
expression.NewGetFieldWithTable(1, sql.Int64, blobsTableName, "size", false),
120+
expression.NewGetFieldWithTable(1, sql.Int64, BlobsTableName, "size", false),
121121
expression.NewLiteral(int64(10), sql.Int64),
122122
),
123123
})
124124
require.NoError(err)
125125

126126
iter, err = table.WithProjectAndFilters(session, nil, []sql.Expression{
127127
expression.NewEquals(
128-
expression.NewGetFieldWithTable(0, sql.Text, blobsTableName, "hash", false),
128+
expression.NewGetFieldWithTable(0, sql.Text, BlobsTableName, "hash", false),
129129
expression.NewLiteral("not exists", sql.Text),
130130
),
131131
})

cmd/gitquery/server.go

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,23 @@ package main
22

33
import (
44
"net"
5+
"os"
56
"strconv"
67

78
"github.com/src-d/gitbase"
89
"github.com/src-d/gitbase/internal/function"
10+
"github.com/src-d/gitbase/internal/rule"
11+
912
sqle "gopkg.in/src-d/go-mysql-server.v0"
1013
"gopkg.in/src-d/go-mysql-server.v0/server"
1114
"gopkg.in/src-d/go-vitess.v0/mysql"
1215
)
1316

17+
// Squashing tables and pushing down join conditions is still a work in
18+
// progress and unstable. To enable it, the UNSTABLE_SQUASH_ENABLE must
19+
// not be empty.
20+
var enableUnstableSquash = os.Getenv("UNSTABLE_SQUASH_ENABLE") != ""
21+
1422
// CmdServer defines server command
1523
type CmdServer struct {
1624
cmd
@@ -44,6 +52,11 @@ func (c *CmdServer) buildDatabase() error {
4452

4553
c.engine.AddDatabase(gitbase.NewDatabase(c.name))
4654
c.engine.Catalog.RegisterFunctions(function.Functions)
55+
56+
if enableUnstableSquash {
57+
c.engine.Analyzer.AddRule(rule.SquashJoinsRule, rule.SquashJoins)
58+
}
59+
4760
return nil
4861
}
4962

@@ -60,9 +73,11 @@ func (c *CmdServer) Execute(args []string) error {
6073

6174
hostString := net.JoinHostPort(c.Host, strconv.Itoa(c.Port))
6275
s, err := server.NewServer(
63-
"tcp",
64-
hostString,
65-
auth,
76+
server.Config{
77+
Protocol: "tcp",
78+
Address: hostString,
79+
Auth: auth,
80+
},
6681
c.engine,
6782
gitbase.NewSessionBuilder(c.pool),
6883
)

commits.go

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,17 @@ import (
1212
type commitsTable struct {
1313
}
1414

15-
var commitsSchema = sql.Schema{
16-
{Name: "hash", Type: sql.Text, Nullable: false, Source: commitsTableName},
17-
{Name: "author_name", Type: sql.Text, Nullable: false, Source: commitsTableName},
18-
{Name: "author_email", Type: sql.Text, Nullable: false, Source: commitsTableName},
19-
{Name: "author_when", Type: sql.Timestamp, Nullable: false, Source: commitsTableName},
20-
{Name: "committer_name", Type: sql.Text, Nullable: false, Source: commitsTableName},
21-
{Name: "committer_email", Type: sql.Text, Nullable: false, Source: commitsTableName},
22-
{Name: "committer_when", Type: sql.Timestamp, Nullable: false, Source: commitsTableName},
23-
{Name: "message", Type: sql.Text, Nullable: false, Source: commitsTableName},
24-
{Name: "tree_hash", Type: sql.Text, Nullable: false, Source: commitsTableName},
15+
// CommitsSchema is the schema for the commits table.
16+
var CommitsSchema = sql.Schema{
17+
{Name: "hash", Type: sql.Text, Nullable: false, Source: CommitsTableName},
18+
{Name: "author_name", Type: sql.Text, Nullable: false, Source: CommitsTableName},
19+
{Name: "author_email", Type: sql.Text, Nullable: false, Source: CommitsTableName},
20+
{Name: "author_when", Type: sql.Timestamp, Nullable: false, Source: CommitsTableName},
21+
{Name: "committer_name", Type: sql.Text, Nullable: false, Source: CommitsTableName},
22+
{Name: "committer_email", Type: sql.Text, Nullable: false, Source: CommitsTableName},
23+
{Name: "committer_when", Type: sql.Timestamp, Nullable: false, Source: CommitsTableName},
24+
{Name: "message", Type: sql.Text, Nullable: false, Source: CommitsTableName},
25+
{Name: "tree_hash", Type: sql.Text, Nullable: false, Source: CommitsTableName},
2526
}
2627

2728
var _ sql.PushdownProjectionAndFiltersTable = (*commitsTable)(nil)
@@ -30,20 +31,24 @@ func newCommitsTable() sql.Table {
3031
return new(commitsTable)
3132
}
3233

34+
var _ Table = (*commitsTable)(nil)
35+
36+
func (commitsTable) isGitbaseTable() {}
37+
3338
func (commitsTable) String() string {
34-
return printTable(commitsTableName, commitsSchema)
39+
return printTable(CommitsTableName, CommitsSchema)
3540
}
3641

3742
func (commitsTable) Resolved() bool {
3843
return true
3944
}
4045

4146
func (commitsTable) Name() string {
42-
return commitsTableName
47+
return CommitsTableName
4348
}
4449

4550
func (commitsTable) Schema() sql.Schema {
46-
return commitsSchema
51+
return CommitsSchema
4752
}
4853

4954
func (r *commitsTable) TransformUp(f sql.TransformNodeFunc) (sql.Node, error) {
@@ -70,15 +75,15 @@ func (commitsTable) Children() []sql.Node {
7075
}
7176

7277
func (commitsTable) HandledFilters(filters []sql.Expression) []sql.Expression {
73-
return handledFilters(commitsTableName, commitsSchema, filters)
78+
return handledFilters(CommitsTableName, CommitsSchema, filters)
7479
}
7580

7681
func (r *commitsTable) WithProjectAndFilters(
7782
ctx *sql.Context,
7883
_, filters []sql.Expression,
7984
) (sql.RowIter, error) {
8085
return rowIterWithSelectors(
81-
ctx, commitsSchema, commitsTableName, filters,
86+
ctx, CommitsSchema, CommitsTableName, filters,
8287
[]string{"hash"},
8388
func(selectors selectors) (RowRepoIter, error) {
8489
if len(selectors["hash"]) == 0 {

0 commit comments

Comments
 (0)