Skip to content

Commit 579894a

Browse files
committed
*: implement rule to squash chainable tables into a single one
Fixes #197 This commit introduces the new rule to squash inner joins between tables that are chainable. This rule is yet experimental and thus can be used by using the `UNSTABLE_SQUASH_ENABLE` environment variable until it is proven to be stable and the results match the ones retrieved without this optimisation. What is a join between chainable tables? That's an inner join between two tables that can be joined using some logic in which one table records are retrieved from some data in the other table rows because there is some sort of relationship between them. To assert such logic can be applied, the join needs to have certain filters in its condition. - `repositories` is chainable with `remotes` iff `repositories.id = remotes.repository_id` - `repositories` is chainable with `refs` iff `repositories.id = refs.repository_id` - `remotes` is chainable with `refs` iff `refs.repository_id = remotes.repository_id` - `refs` is chainable with `commits` iff `refs.hash = commits.hash`, `history_idx(refs.hash, commits.hash) >= 0` or `0 <= history_idx(refs.hash, commits.hash)` - `refs` is chainable with `tree_entries` iff `commit_has_tree(refs.hash, tree_entries.tree_hash)` - `refs` is chainable with `blobs` iff `commit_has_blob(refs.hash, blob.hash)` - `commits` is chainable with `tree_entries` iff `commit_has_tree(commits.hash, tree_entries.tree_hash)` or `commits.tree_hash = tree_entries.tree_hash` - `commits` is chainable with `blobs` iff `commit_has_blob(commits.hash, blob.hash)` - `tree_entries` is chainable with `blobs` iff `tree_entries.entry_hash = blobs.hash` Since chains are built hierarchically using the table hierarchy (repositories -> remotes -> refs -> commits -> tree_entries -> blobs), any chain can be joined with any table iff the last part of the chain and that table match the conditions stated before. To allow this, there are several iterators, some for the same table, depending on the position in the chain that table is, that are composed in the analyzer rule. Even if the chain is built hierarchically and thus it has a different schema ordering than the original schema, a mapping between the chain schema and the original one is computed so that the result of the chain is exactly the same as the one without the optimisation. Notes about the experimental status of this feature: - Lacks some integration tests that will be done on subsequent PRs. - Tree entries table is currently under examination and depending on what we decide, the results it returns right now might need to change. - Probably better to test it thoroughly and do some performance measurements before making this the default behaviour. Signed-off-by: Miguel Molina <[email protected]>
1 parent edca944 commit 579894a

21 files changed

+4077
-146
lines changed

blobs.go

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,11 @@ var (
2727

2828
type blobsTable struct{}
2929

30-
var blobsSchema = sql.Schema{
31-
{Name: "hash", Type: sql.Text, Nullable: false, Source: blobsTableName},
32-
{Name: "size", Type: sql.Int64, Nullable: false, Source: blobsTableName},
33-
{Name: "content", Type: sql.Blob, Nullable: false, Source: blobsTableName},
30+
// BlobsSchema is the schema for the blobs table.
31+
var BlobsSchema = sql.Schema{
32+
{Name: "hash", Type: sql.Text, Nullable: false, Source: BlobsTableName},
33+
{Name: "size", Type: sql.Int64, Nullable: false, Source: BlobsTableName},
34+
{Name: "content", Type: sql.Blob, Nullable: false, Source: BlobsTableName},
3435
}
3536

3637
var _ sql.PushdownProjectionAndFiltersTable = (*blobsTable)(nil)
@@ -39,20 +40,24 @@ func newBlobsTable() sql.Table {
3940
return new(blobsTable)
4041
}
4142

43+
var _ Table = (*blobsTable)(nil)
44+
45+
func (blobsTable) isGitbaseTable() {}
46+
4247
func (blobsTable) String() string {
43-
return printTable(blobsTableName, blobsSchema)
48+
return printTable(BlobsTableName, BlobsSchema)
4449
}
4550

4651
func (blobsTable) Resolved() bool {
4752
return true
4853
}
4954

5055
func (blobsTable) Name() string {
51-
return blobsTableName
56+
return BlobsTableName
5257
}
5358

5459
func (blobsTable) Schema() sql.Schema {
55-
return blobsSchema
60+
return BlobsSchema
5661
}
5762

5863
func (r *blobsTable) TransformUp(f sql.TransformNodeFunc) (sql.Node, error) {
@@ -79,15 +84,15 @@ func (blobsTable) Children() []sql.Node {
7984
}
8085

8186
func (blobsTable) HandledFilters(filters []sql.Expression) []sql.Expression {
82-
return handledFilters(blobsTableName, blobsSchema, filters)
87+
return handledFilters(BlobsTableName, BlobsSchema, filters)
8388
}
8489

8590
func (r *blobsTable) WithProjectAndFilters(
8691
ctx *sql.Context,
8792
_, filters []sql.Expression,
8893
) (sql.RowIter, error) {
8994
return rowIterWithSelectors(
90-
ctx, blobsSchema, blobsTableName, filters,
95+
ctx, BlobsSchema, BlobsTableName, filters,
9196
[]string{"hash"},
9297
func(selectors selectors) (RowRepoIter, error) {
9398
if len(selectors["hash"]) == 0 {

blobs_test.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,19 @@ import (
1111
func TestBlobsTable_Name(t *testing.T) {
1212
require := require.New(t)
1313

14-
table := getTable(require, blobsTableName)
15-
require.Equal(blobsTableName, table.Name())
14+
table := getTable(require, BlobsTableName)
15+
require.Equal(BlobsTableName, table.Name())
1616

1717
// Check that each column source is the same as table name
1818
for _, c := range table.Schema() {
19-
require.Equal(blobsTableName, c.Source)
19+
require.Equal(BlobsTableName, c.Source)
2020
}
2121
}
2222

2323
func TestBlobsTable_Children(t *testing.T) {
2424
require := require.New(t)
2525

26-
table := getTable(require, blobsTableName)
26+
table := getTable(require, BlobsTableName)
2727
require.Equal(0, len(table.Children()))
2828
}
2929

@@ -32,7 +32,7 @@ func TestBlobsTable_RowIter(t *testing.T) {
3232
ctx, _, cleanup := setup(t)
3333
defer cleanup()
3434

35-
table := getTable(require, blobsTableName)
35+
table := getTable(require, BlobsTableName)
3636

3737
rows, err := sql.NodeToRows(ctx, table)
3838
require.NoError(err)
@@ -105,7 +105,7 @@ func TestBlobsPushdown(t *testing.T) {
105105

106106
iter, err = table.WithProjectAndFilters(session, nil, []sql.Expression{
107107
expression.NewEquals(
108-
expression.NewGetFieldWithTable(0, sql.Text, blobsTableName, "hash", false),
108+
expression.NewGetFieldWithTable(0, sql.Text, BlobsTableName, "hash", false),
109109
expression.NewLiteral("32858aad3c383ed1ff0a0f9bdf231d54a00c9e88", sql.Text),
110110
),
111111
})
@@ -117,15 +117,15 @@ func TestBlobsPushdown(t *testing.T) {
117117

118118
iter, err = table.WithProjectAndFilters(session, nil, []sql.Expression{
119119
expression.NewLessThan(
120-
expression.NewGetFieldWithTable(1, sql.Int64, blobsTableName, "size", false),
120+
expression.NewGetFieldWithTable(1, sql.Int64, BlobsTableName, "size", false),
121121
expression.NewLiteral(int64(10), sql.Int64),
122122
),
123123
})
124124
require.NoError(err)
125125

126126
iter, err = table.WithProjectAndFilters(session, nil, []sql.Expression{
127127
expression.NewEquals(
128-
expression.NewGetFieldWithTable(0, sql.Text, blobsTableName, "hash", false),
128+
expression.NewGetFieldWithTable(0, sql.Text, BlobsTableName, "hash", false),
129129
expression.NewLiteral("not exists", sql.Text),
130130
),
131131
})

cmd/gitquery/server.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,23 @@ package main
22

33
import (
44
"net"
5+
"os"
56
"strconv"
67

78
"github.com/src-d/gitbase"
89
"github.com/src-d/gitbase/internal/function"
10+
"github.com/src-d/gitbase/internal/rule"
11+
912
sqle "gopkg.in/src-d/go-mysql-server.v0"
1013
"gopkg.in/src-d/go-mysql-server.v0/server"
1114
"gopkg.in/src-d/go-vitess.v0/mysql"
1215
)
1316

17+
// Squashing tables and pushing down join conditions is still a work in
18+
// progress and unstable. To enable it, the UNSTABLE_SQUASH_ENABLE must
19+
// not be empty.
20+
var enableUnstableSquash = os.Getenv("UNSTABLE_SQUASH_ENABLE") != ""
21+
1422
// CmdServer defines server command
1523
type CmdServer struct {
1624
cmd
@@ -44,6 +52,11 @@ func (c *CmdServer) buildDatabase() error {
4452

4553
c.engine.AddDatabase(gitbase.NewDatabase(c.name))
4654
c.engine.Catalog.RegisterFunctions(function.Functions)
55+
56+
if enableUnstableSquash {
57+
c.engine.Analyzer.AddRule(rule.SquashJoinsRule, rule.SquashJoins)
58+
}
59+
4760
return nil
4861
}
4962

commits.go

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,17 @@ import (
1212
type commitsTable struct {
1313
}
1414

15-
var commitsSchema = sql.Schema{
16-
{Name: "hash", Type: sql.Text, Nullable: false, Source: commitsTableName},
17-
{Name: "author_name", Type: sql.Text, Nullable: false, Source: commitsTableName},
18-
{Name: "author_email", Type: sql.Text, Nullable: false, Source: commitsTableName},
19-
{Name: "author_when", Type: sql.Timestamp, Nullable: false, Source: commitsTableName},
20-
{Name: "committer_name", Type: sql.Text, Nullable: false, Source: commitsTableName},
21-
{Name: "committer_email", Type: sql.Text, Nullable: false, Source: commitsTableName},
22-
{Name: "committer_when", Type: sql.Timestamp, Nullable: false, Source: commitsTableName},
23-
{Name: "message", Type: sql.Text, Nullable: false, Source: commitsTableName},
24-
{Name: "tree_hash", Type: sql.Text, Nullable: false, Source: commitsTableName},
15+
// CommitsSchema is the schema for the commits table.
16+
var CommitsSchema = sql.Schema{
17+
{Name: "hash", Type: sql.Text, Nullable: false, Source: CommitsTableName},
18+
{Name: "author_name", Type: sql.Text, Nullable: false, Source: CommitsTableName},
19+
{Name: "author_email", Type: sql.Text, Nullable: false, Source: CommitsTableName},
20+
{Name: "author_when", Type: sql.Timestamp, Nullable: false, Source: CommitsTableName},
21+
{Name: "committer_name", Type: sql.Text, Nullable: false, Source: CommitsTableName},
22+
{Name: "committer_email", Type: sql.Text, Nullable: false, Source: CommitsTableName},
23+
{Name: "committer_when", Type: sql.Timestamp, Nullable: false, Source: CommitsTableName},
24+
{Name: "message", Type: sql.Text, Nullable: false, Source: CommitsTableName},
25+
{Name: "tree_hash", Type: sql.Text, Nullable: false, Source: CommitsTableName},
2526
}
2627

2728
var _ sql.PushdownProjectionAndFiltersTable = (*commitsTable)(nil)
@@ -30,20 +31,24 @@ func newCommitsTable() sql.Table {
3031
return new(commitsTable)
3132
}
3233

34+
var _ Table = (*commitsTable)(nil)
35+
36+
func (commitsTable) isGitbaseTable() {}
37+
3338
func (commitsTable) String() string {
34-
return printTable(commitsTableName, commitsSchema)
39+
return printTable(CommitsTableName, CommitsSchema)
3540
}
3641

3742
func (commitsTable) Resolved() bool {
3843
return true
3944
}
4045

4146
func (commitsTable) Name() string {
42-
return commitsTableName
47+
return CommitsTableName
4348
}
4449

4550
func (commitsTable) Schema() sql.Schema {
46-
return commitsSchema
51+
return CommitsSchema
4752
}
4853

4954
func (r *commitsTable) TransformUp(f sql.TransformNodeFunc) (sql.Node, error) {
@@ -70,15 +75,15 @@ func (commitsTable) Children() []sql.Node {
7075
}
7176

7277
func (commitsTable) HandledFilters(filters []sql.Expression) []sql.Expression {
73-
return handledFilters(commitsTableName, commitsSchema, filters)
78+
return handledFilters(CommitsTableName, CommitsSchema, filters)
7479
}
7580

7681
func (r *commitsTable) WithProjectAndFilters(
7782
ctx *sql.Context,
7883
_, filters []sql.Expression,
7984
) (sql.RowIter, error) {
8085
return rowIterWithSelectors(
81-
ctx, commitsSchema, commitsTableName, filters,
86+
ctx, CommitsSchema, CommitsTableName, filters,
8287
[]string{"hash"},
8388
func(selectors selectors) (RowRepoIter, error) {
8489
if len(selectors["hash"]) == 0 {

commits_test.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,19 @@ import (
1111
func TestCommitsTable_Name(t *testing.T) {
1212
require := require.New(t)
1313

14-
table := getTable(require, commitsTableName)
15-
require.Equal(commitsTableName, table.Name())
14+
table := getTable(require, CommitsTableName)
15+
require.Equal(CommitsTableName, table.Name())
1616

1717
// Check that each column source is the same as table name
1818
for _, c := range table.Schema() {
19-
require.Equal(commitsTableName, c.Source)
19+
require.Equal(CommitsTableName, c.Source)
2020
}
2121
}
2222

2323
func TestCommitsTable_Children(t *testing.T) {
2424
require := require.New(t)
2525

26-
table := getTable(require, commitsTableName)
26+
table := getTable(require, CommitsTableName)
2727
require.Equal(0, len(table.Children()))
2828
}
2929

@@ -32,7 +32,7 @@ func TestCommitsTable_RowIter(t *testing.T) {
3232
session, _, cleanup := setup(t)
3333
defer cleanup()
3434

35-
table := getTable(require, commitsTableName)
35+
table := getTable(require, CommitsTableName)
3636

3737
rows, err := sql.NodeToRows(session, table)
3838
require.Nil(err)
@@ -61,7 +61,7 @@ func TestCommitsPushdown(t *testing.T) {
6161

6262
iter, err = table.WithProjectAndFilters(session, nil, []sql.Expression{
6363
expression.NewEquals(
64-
expression.NewGetFieldWithTable(0, sql.Text, commitsTableName, "hash", false),
64+
expression.NewGetFieldWithTable(0, sql.Text, CommitsTableName, "hash", false),
6565
expression.NewLiteral("918c48b83bd081e863dbe1b80f8998f058cd8294", sql.Text),
6666
),
6767
})
@@ -73,7 +73,7 @@ func TestCommitsPushdown(t *testing.T) {
7373

7474
iter, err = table.WithProjectAndFilters(session, nil, []sql.Expression{
7575
expression.NewEquals(
76-
expression.NewGetFieldWithTable(0, sql.Text, commitsTableName, "hash", false),
76+
expression.NewGetFieldWithTable(0, sql.Text, CommitsTableName, "hash", false),
7777
expression.NewLiteral("not exists", sql.Text),
7878
),
7979
})
@@ -85,7 +85,7 @@ func TestCommitsPushdown(t *testing.T) {
8585

8686
iter, err = table.WithProjectAndFilters(session, nil, []sql.Expression{
8787
expression.NewEquals(
88-
expression.NewGetFieldWithTable(2, sql.Text, commitsTableName, "author_email", false),
88+
expression.NewGetFieldWithTable(2, sql.Text, CommitsTableName, "author_email", false),
8989
expression.NewLiteral("[email protected]", sql.Text),
9090
),
9191
})
@@ -97,11 +97,11 @@ func TestCommitsPushdown(t *testing.T) {
9797

9898
iter, err = table.WithProjectAndFilters(session, nil, []sql.Expression{
9999
expression.NewEquals(
100-
expression.NewGetFieldWithTable(2, sql.Text, commitsTableName, "author_email", false),
100+
expression.NewGetFieldWithTable(2, sql.Text, CommitsTableName, "author_email", false),
101101
expression.NewLiteral("[email protected]", sql.Text),
102102
),
103103
expression.NewEquals(
104-
expression.NewGetFieldWithTable(7, sql.Text, commitsTableName, "message", false),
104+
expression.NewGetFieldWithTable(7, sql.Text, CommitsTableName, "message", false),
105105
expression.NewLiteral("vendor stuff\n", sql.Text),
106106
),
107107
})

database.go

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,18 @@ import (
55
)
66

77
const (
8-
// TODO 'references' is a reserved keyword into the parser
9-
referencesTableName = "refs"
10-
commitsTableName = "commits"
11-
blobsTableName = "blobs"
12-
treeEntriesTableName = "tree_entries"
13-
repositoriesTableName = "repositories"
14-
remotesTableName = "remotes"
8+
// ReferencesTableName is the name of the refs table.
9+
ReferencesTableName = "refs"
10+
// CommitsTableName is the name of the commits table.
11+
CommitsTableName = "commits"
12+
// BlobsTableName is the name of the blobs table.
13+
BlobsTableName = "blobs"
14+
// TreeEntriesTableName is the name of the tree entries table.
15+
TreeEntriesTableName = "tree_entries"
16+
// RepositoriesTableName is the name of the repositories table.
17+
RepositoriesTableName = "repositories"
18+
// RemotesTableName is the name of the remotes table.
19+
RemotesTableName = "remotes"
1520
)
1621

1722
// Database holds all git repository tables
@@ -47,11 +52,11 @@ func (d *Database) Name() string {
4752
// Tables returns a map with all initialized tables
4853
func (d *Database) Tables() map[string]sql.Table {
4954
return map[string]sql.Table{
50-
commitsTableName: d.commits,
51-
referencesTableName: d.references,
52-
blobsTableName: d.blobs,
53-
treeEntriesTableName: d.treeEntries,
54-
repositoriesTableName: d.repositories,
55-
remotesTableName: d.remotes,
55+
CommitsTableName: d.commits,
56+
ReferencesTableName: d.references,
57+
BlobsTableName: d.blobs,
58+
TreeEntriesTableName: d.treeEntries,
59+
RepositoriesTableName: d.repositories,
60+
RemotesTableName: d.remotes,
5661
}
5762
}

database_test.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,12 @@ func TestDatabase_Tables(t *testing.T) {
3131

3232
sort.Strings(tableNames)
3333
expected := []string{
34-
commitsTableName,
35-
referencesTableName,
36-
treeEntriesTableName,
37-
blobsTableName,
38-
repositoriesTableName,
39-
remotesTableName,
34+
CommitsTableName,
35+
ReferencesTableName,
36+
TreeEntriesTableName,
37+
BlobsTableName,
38+
RepositoriesTableName,
39+
RemotesTableName,
4040
}
4141
sort.Strings(expected)
4242

0 commit comments

Comments
 (0)